349 lines
7.9 KiB
Diff
349 lines
7.9 KiB
Diff
|
|
From 6e66b445df0d39b9e796d1a4afcbe617197278de Mon Sep 17 00:00:00 2001
|
||
|
|
From: Qi Tao <taoqi10@huawei.com>
|
||
|
|
Date: Wed, 20 Mar 2024 16:13:45 +0800
|
||
|
|
Subject: [PATCH 27/44] cipher: add support for SM4(ECB) algorithm in CE
|
||
|
|
instruction
|
||
|
|
|
||
|
|
Provides the CE acceleration instruction (Crypto-Extension)
|
||
|
|
to accelerate the execution of the SM4(ECB) algorithm.
|
||
|
|
|
||
|
|
Signed-off-by: Qi Tao <taoqi10@huawei.com>
|
||
|
|
---
|
||
|
|
drv/isa_ce_sm4.c | 17 +++
|
||
|
|
drv/isa_ce_sm4.h | 2 +
|
||
|
|
drv/isa_ce_sm4_armv8.S | 263 +++++++++++++++++++++++++++++++++++++++++
|
||
|
|
3 files changed, 282 insertions(+)
|
||
|
|
|
||
|
|
diff --git a/drv/isa_ce_sm4.c b/drv/isa_ce_sm4.c
|
||
|
|
index 466b060..ccab8fb 100644
|
||
|
|
--- a/drv/isa_ce_sm4.c
|
||
|
|
+++ b/drv/isa_ce_sm4.c
|
||
|
|
@@ -128,6 +128,16 @@ static void sm4_cbc_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rke
|
||
|
|
sm4_v8_cbc_encrypt(msg->in, msg->out, msg->in_bytes, rkey_dec, msg->iv, SM4_DECRYPT);
|
||
|
|
}
|
||
|
|
|
||
|
|
+static void sm4_ecb_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc)
|
||
|
|
+{
|
||
|
|
+ sm4_v8_ecb_encrypt(msg->in, msg->out, msg->in_bytes, rkey_enc, SM4_ENCRYPT);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void sm4_ecb_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_dec)
|
||
|
|
+{
|
||
|
|
+ sm4_v8_ecb_encrypt(msg->in, msg->out, msg->in_bytes, rkey_dec, SM4_DECRYPT);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
void sm4_set_encrypt_key(const __u8 *userKey, struct SM4_KEY *key)
|
||
|
|
{
|
||
|
|
sm4_v8_set_encrypt_key(userKey, key);
|
||
|
|
@@ -254,6 +264,12 @@ static int isa_ce_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_
|
||
|
|
sm4_set_decrypt_key(msg->key, &rkey);
|
||
|
|
|
||
|
|
switch (msg->mode) {
|
||
|
|
+ case WD_CIPHER_ECB:
|
||
|
|
+ if (msg->op_type == WD_CIPHER_ENCRYPTION)
|
||
|
|
+ sm4_ecb_encrypt(msg, &rkey);
|
||
|
|
+ else
|
||
|
|
+ sm4_ecb_decrypt(msg, &rkey);
|
||
|
|
+ break;
|
||
|
|
case WD_CIPHER_CBC:
|
||
|
|
if (msg->op_type == WD_CIPHER_ENCRYPTION)
|
||
|
|
sm4_cbc_encrypt(msg, &rkey);
|
||
|
|
@@ -317,6 +333,7 @@ static struct wd_alg_driver cipher_alg_driver[] = {
|
||
|
|
GEN_CE_ALG_DRIVER("ctr(sm4)", cipher),
|
||
|
|
GEN_CE_ALG_DRIVER("cfb(sm4)", cipher),
|
||
|
|
GEN_CE_ALG_DRIVER("xts(sm4)", cipher),
|
||
|
|
+ GEN_CE_ALG_DRIVER("ecb(sm4)", cipher),
|
||
|
|
};
|
||
|
|
|
||
|
|
static void __attribute__((constructor)) isa_ce_probe(void)
|
||
|
|
diff --git a/drv/isa_ce_sm4.h b/drv/isa_ce_sm4.h
|
||
|
|
index d92069f..d10b0af 100644
|
||
|
|
--- a/drv/isa_ce_sm4.h
|
||
|
|
+++ b/drv/isa_ce_sm4.h
|
||
|
|
@@ -28,6 +28,8 @@ void sm4_v8_set_decrypt_key(const unsigned char *userKey, struct SM4_KEY *key);
|
||
|
|
void sm4_v8_cbc_encrypt(const unsigned char *in, unsigned char *out,
|
||
|
|
size_t length, const struct SM4_KEY *key,
|
||
|
|
unsigned char *ivec, const int enc);
|
||
|
|
+void sm4_v8_ecb_encrypt(const unsigned char *in, unsigned char *out,
|
||
|
|
+ size_t length, const struct SM4_KEY *key, const int enc);
|
||
|
|
void sm4_v8_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
|
||
|
|
size_t len, const void *key, const unsigned char ivec[16]);
|
||
|
|
|
||
|
|
diff --git a/drv/isa_ce_sm4_armv8.S b/drv/isa_ce_sm4_armv8.S
|
||
|
|
index 342dfa5..7d84496 100644
|
||
|
|
--- a/drv/isa_ce_sm4_armv8.S
|
||
|
|
+++ b/drv/isa_ce_sm4_armv8.S
|
||
|
|
@@ -506,6 +506,269 @@ sm4_v8_cbc_encrypt:
|
||
|
|
ldp d8,d9,[sp],#16
|
||
|
|
ret
|
||
|
|
.size sm4_v8_cbc_encrypt,.-sm4_v8_cbc_encrypt
|
||
|
|
+.globl sm4_v8_ecb_encrypt
|
||
|
|
+.type sm4_v8_ecb_encrypt,%function
|
||
|
|
+.align 5
|
||
|
|
+sm4_v8_ecb_encrypt:
|
||
|
|
+ AARCH64_VALID_CALL_TARGET
|
||
|
|
+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x3],#64
|
||
|
|
+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x3]
|
||
|
|
+1:
|
||
|
|
+ cmp x2,#64
|
||
|
|
+ b.lt 1f
|
||
|
|
+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x0],#64
|
||
|
|
+ cmp x2,#128
|
||
|
|
+ b.lt 2f
|
||
|
|
+ ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x0],#64
|
||
|
|
+ // 8 blocks
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v16.16b,v16.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v17.16b,v17.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v18.16b,v18.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v19.16b,v19.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v20.16b,v20.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v21.16b,v21.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v22.16b,v22.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v23.16b,v23.16b
|
||
|
|
+#endif
|
||
|
|
+ sm4e v16.4s,v0.4s;
|
||
|
|
+ sm4e v17.4s,v0.4s;
|
||
|
|
+ sm4e v18.4s,v0.4s;
|
||
|
|
+ sm4e v19.4s,v0.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v1.4s;
|
||
|
|
+ sm4e v17.4s,v1.4s;
|
||
|
|
+ sm4e v18.4s,v1.4s;
|
||
|
|
+ sm4e v19.4s,v1.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v2.4s;
|
||
|
|
+ sm4e v17.4s,v2.4s;
|
||
|
|
+ sm4e v18.4s,v2.4s;
|
||
|
|
+ sm4e v19.4s,v2.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v3.4s;
|
||
|
|
+ sm4e v17.4s,v3.4s;
|
||
|
|
+ sm4e v18.4s,v3.4s;
|
||
|
|
+ sm4e v19.4s,v3.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v4.4s;
|
||
|
|
+ sm4e v17.4s,v4.4s;
|
||
|
|
+ sm4e v18.4s,v4.4s;
|
||
|
|
+ sm4e v19.4s,v4.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v5.4s;
|
||
|
|
+ sm4e v17.4s,v5.4s;
|
||
|
|
+ sm4e v18.4s,v5.4s;
|
||
|
|
+ sm4e v19.4s,v5.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v6.4s;
|
||
|
|
+ sm4e v17.4s,v6.4s;
|
||
|
|
+ sm4e v18.4s,v6.4s;
|
||
|
|
+ sm4e v19.4s,v6.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v7.4s;
|
||
|
|
+ rev64 v16.4S,v16.4S
|
||
|
|
+ sm4e v17.4s,v7.4s;
|
||
|
|
+ ext v16.16b,v16.16b,v16.16b,#8
|
||
|
|
+ rev64 v17.4S,v17.4S
|
||
|
|
+ sm4e v18.4s,v7.4s;
|
||
|
|
+ ext v17.16b,v17.16b,v17.16b,#8
|
||
|
|
+ rev64 v18.4S,v18.4S
|
||
|
|
+ sm4e v19.4s,v7.4s;
|
||
|
|
+ ext v18.16b,v18.16b,v18.16b,#8
|
||
|
|
+ rev64 v19.4S,v19.4S
|
||
|
|
+ ext v19.16b,v19.16b,v19.16b,#8
|
||
|
|
+ sm4e v20.4s,v0.4s;
|
||
|
|
+ sm4e v21.4s,v0.4s;
|
||
|
|
+ sm4e v22.4s,v0.4s;
|
||
|
|
+ sm4e v23.4s,v0.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v20.4s,v1.4s;
|
||
|
|
+ sm4e v21.4s,v1.4s;
|
||
|
|
+ sm4e v22.4s,v1.4s;
|
||
|
|
+ sm4e v23.4s,v1.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v20.4s,v2.4s;
|
||
|
|
+ sm4e v21.4s,v2.4s;
|
||
|
|
+ sm4e v22.4s,v2.4s;
|
||
|
|
+ sm4e v23.4s,v2.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v20.4s,v3.4s;
|
||
|
|
+ sm4e v21.4s,v3.4s;
|
||
|
|
+ sm4e v22.4s,v3.4s;
|
||
|
|
+ sm4e v23.4s,v3.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v20.4s,v4.4s;
|
||
|
|
+ sm4e v21.4s,v4.4s;
|
||
|
|
+ sm4e v22.4s,v4.4s;
|
||
|
|
+ sm4e v23.4s,v4.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v20.4s,v5.4s;
|
||
|
|
+ sm4e v21.4s,v5.4s;
|
||
|
|
+ sm4e v22.4s,v5.4s;
|
||
|
|
+ sm4e v23.4s,v5.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v20.4s,v6.4s;
|
||
|
|
+ sm4e v21.4s,v6.4s;
|
||
|
|
+ sm4e v22.4s,v6.4s;
|
||
|
|
+ sm4e v23.4s,v6.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v20.4s,v7.4s;
|
||
|
|
+ rev64 v20.4S,v20.4S
|
||
|
|
+ sm4e v21.4s,v7.4s;
|
||
|
|
+ ext v20.16b,v20.16b,v20.16b,#8
|
||
|
|
+ rev64 v21.4S,v21.4S
|
||
|
|
+ sm4e v22.4s,v7.4s;
|
||
|
|
+ ext v21.16b,v21.16b,v21.16b,#8
|
||
|
|
+ rev64 v22.4S,v22.4S
|
||
|
|
+ sm4e v23.4s,v7.4s;
|
||
|
|
+ ext v22.16b,v22.16b,v22.16b,#8
|
||
|
|
+ rev64 v23.4S,v23.4S
|
||
|
|
+ ext v23.16b,v23.16b,v23.16b,#8
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v16.16b,v16.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v17.16b,v17.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v18.16b,v18.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v19.16b,v19.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v20.16b,v20.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v21.16b,v21.16b
|
||
|
|
+#endif
|
||
|
|
+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v22.16b,v22.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v23.16b,v23.16b
|
||
|
|
+#endif
|
||
|
|
+ st1 {v20.4s,v21.4s,v22.4s,v23.4s},[x1],#64
|
||
|
|
+ subs x2,x2,#128
|
||
|
|
+ b.gt 1b
|
||
|
|
+ ret
|
||
|
|
+ // 4 blocks
|
||
|
|
+2:
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v16.16b,v16.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v17.16b,v17.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v18.16b,v18.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v19.16b,v19.16b
|
||
|
|
+#endif
|
||
|
|
+ sm4e v16.4s,v0.4s;
|
||
|
|
+ sm4e v17.4s,v0.4s;
|
||
|
|
+ sm4e v18.4s,v0.4s;
|
||
|
|
+ sm4e v19.4s,v0.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v1.4s;
|
||
|
|
+ sm4e v17.4s,v1.4s;
|
||
|
|
+ sm4e v18.4s,v1.4s;
|
||
|
|
+ sm4e v19.4s,v1.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v2.4s;
|
||
|
|
+ sm4e v17.4s,v2.4s;
|
||
|
|
+ sm4e v18.4s,v2.4s;
|
||
|
|
+ sm4e v19.4s,v2.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v3.4s;
|
||
|
|
+ sm4e v17.4s,v3.4s;
|
||
|
|
+ sm4e v18.4s,v3.4s;
|
||
|
|
+ sm4e v19.4s,v3.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v4.4s;
|
||
|
|
+ sm4e v17.4s,v4.4s;
|
||
|
|
+ sm4e v18.4s,v4.4s;
|
||
|
|
+ sm4e v19.4s,v4.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v5.4s;
|
||
|
|
+ sm4e v17.4s,v5.4s;
|
||
|
|
+ sm4e v18.4s,v5.4s;
|
||
|
|
+ sm4e v19.4s,v5.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v6.4s;
|
||
|
|
+ sm4e v17.4s,v6.4s;
|
||
|
|
+ sm4e v18.4s,v6.4s;
|
||
|
|
+ sm4e v19.4s,v6.4s;
|
||
|
|
+
|
||
|
|
+ sm4e v16.4s,v7.4s;
|
||
|
|
+ rev64 v16.4S,v16.4S
|
||
|
|
+ sm4e v17.4s,v7.4s;
|
||
|
|
+ ext v16.16b,v16.16b,v16.16b,#8
|
||
|
|
+ rev64 v17.4S,v17.4S
|
||
|
|
+ sm4e v18.4s,v7.4s;
|
||
|
|
+ ext v17.16b,v17.16b,v17.16b,#8
|
||
|
|
+ rev64 v18.4S,v18.4S
|
||
|
|
+ sm4e v19.4s,v7.4s;
|
||
|
|
+ ext v18.16b,v18.16b,v18.16b,#8
|
||
|
|
+ rev64 v19.4S,v19.4S
|
||
|
|
+ ext v19.16b,v19.16b,v19.16b,#8
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v16.16b,v16.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v17.16b,v17.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v18.16b,v18.16b
|
||
|
|
+#endif
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v19.16b,v19.16b
|
||
|
|
+#endif
|
||
|
|
+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64
|
||
|
|
+ subs x2,x2,#64
|
||
|
|
+ b.gt 1b
|
||
|
|
+1:
|
||
|
|
+ subs x2,x2,#16
|
||
|
|
+ b.lt 1f
|
||
|
|
+ ld1 {v16.4s},[x0],#16
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v16.16b,v16.16b
|
||
|
|
+#endif
|
||
|
|
+ sm4e v16.4s,v0.4s;
|
||
|
|
+ sm4e v16.4s,v1.4s;
|
||
|
|
+ sm4e v16.4s,v2.4s;
|
||
|
|
+ sm4e v16.4s,v3.4s;
|
||
|
|
+ sm4e v16.4s,v4.4s;
|
||
|
|
+ sm4e v16.4s,v5.4s;
|
||
|
|
+ sm4e v16.4s,v6.4s;
|
||
|
|
+ sm4e v16.4s,v7.4s;
|
||
|
|
+ rev64 v16.4S,v16.4S
|
||
|
|
+ ext v16.16b,v16.16b,v16.16b,#8
|
||
|
|
+#ifndef __ARMEB__
|
||
|
|
+ rev32 v16.16b,v16.16b
|
||
|
|
+#endif
|
||
|
|
+ st1 {v16.4s},[x1],#16
|
||
|
|
+ b.ne 1b
|
||
|
|
+1:
|
||
|
|
+ ret
|
||
|
|
+.size sm4_v8_ecb_encrypt,.-sm4_v8_ecb_encrypt
|
||
|
|
.globl sm4_v8_ctr32_encrypt_blocks
|
||
|
|
.type sm4_v8_ctr32_encrypt_blocks,%function
|
||
|
|
.align 5
|
||
|
|
--
|
||
|
|
2.25.1
|
||
|
|
|