1889 lines
55 KiB
Diff
1889 lines
55 KiB
Diff
From da5f058d30f6d7eb28b4afbe27633d7664ba0961 Mon Sep 17 00:00:00 2001
|
|
From: Zhiqi Song <songzhiqi1@huawei.com>
|
|
Date: Mon, 11 Mar 2024 18:07:22 +0800
|
|
Subject: [PATCH 13/44] uadk/isa-ce: support sm3 ce instruction
|
|
|
|
Support sync sm3 ce instruction, users can use ce
|
|
instruction to accelerate sm3 sync task through init2
|
|
related functions.
|
|
|
|
This patch also includes:
|
|
1. Add compile parameter and related file to support
|
|
isa-ce library.
|
|
2. Check whether the platform supports the CE instruction
|
|
in alg driver register process.
|
|
3. Make HW driver and INSTR driver of the same alg can
|
|
be requested at the same time.
|
|
4. Support sm3 ce block mode and stream mode for sm3-normal
|
|
and hmac-sm3.
|
|
|
|
Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
|
|
---
|
|
Makefile.am | 15 +-
|
|
configure.ac | 3 +
|
|
drv/isa_ce_sm3.c | 401 ++++++++++++++++++++
|
|
drv/isa_ce_sm3.h | 86 +++++
|
|
drv/isa_ce_sm3_armv8.S | 765 ++++++++++++++++++++++++++++++++++++++
|
|
include/drv/arm_arch_ce.h | 199 ++++++++++
|
|
include/wd_alg.h | 43 +++
|
|
wd_alg.c | 32 +-
|
|
wd_digest.c | 2 +-
|
|
wd_sched.c | 2 +-
|
|
wd_util.c | 87 ++++-
|
|
11 files changed, 1616 insertions(+), 19 deletions(-)
|
|
create mode 100644 drv/isa_ce_sm3.c
|
|
create mode 100644 drv/isa_ce_sm3.h
|
|
create mode 100644 drv/isa_ce_sm3_armv8.S
|
|
create mode 100644 include/drv/arm_arch_ce.h
|
|
|
|
diff --git a/Makefile.am b/Makefile.am
|
|
index 25853eb..19eab30 100644
|
|
--- a/Makefile.am
|
|
+++ b/Makefile.am
|
|
@@ -43,7 +43,8 @@ nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd
|
|
lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la
|
|
|
|
uadk_driversdir=$(libdir)/uadk
|
|
-uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la
|
|
+uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la \
|
|
+ libisa_ce.la
|
|
|
|
libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \
|
|
v1/wd.c v1/wd.h v1/wd_adapter.c v1/wd_adapter.h \
|
|
@@ -87,6 +88,10 @@ libhisi_sec_la_SOURCES=drv/hisi_sec.c drv/hisi_qm_udrv.c \
|
|
|
|
libhisi_hpre_la_SOURCES=drv/hisi_hpre.c drv/hisi_qm_udrv.c \
|
|
hisi_qm_udrv.h
|
|
+
|
|
+libisa_ce_la_SOURCES=drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S arm_arch_ce.h \
|
|
+ drv/isa_ce_sm3.h
|
|
+
|
|
if WD_STATIC_DRV
|
|
AM_CFLAGS += -DWD_STATIC_DRV -fPIC
|
|
AM_CFLAGS += -DWD_NO_LOG
|
|
@@ -106,6 +111,10 @@ libhisi_sec_la_DEPENDENCIES = libwd.la libwd_crypto.la
|
|
|
|
libhisi_hpre_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS)
|
|
libhisi_hpre_la_DEPENDENCIES = libwd.la libwd_crypto.la
|
|
+
|
|
+libisa_ce_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS)
|
|
+libisa_ce_la_DEPENDENCIES = libwd.la libwd_crypto.la
|
|
+
|
|
else
|
|
UADK_WD_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd.map
|
|
UADK_CRYPTO_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd_crypto.map
|
|
@@ -134,6 +143,10 @@ libhisi_sec_la_DEPENDENCIES= libwd.la libwd_crypto.la
|
|
libhisi_hpre_la_LIBADD= -lwd -lwd_crypto
|
|
libhisi_hpre_la_LDFLAGS=$(UADK_VERSION)
|
|
libhisi_hpre_la_DEPENDENCIES= libwd.la libwd_crypto.la
|
|
+
|
|
+libisa_ce_la_LIBADD= -lwd -lwd_crypto
|
|
+libisa_ce_la_LDFLAGS=$(UADK_VERSION)
|
|
+libisa_ce_la_DEPENDENCIES= libwd.la libwd_crypto.la
|
|
endif # WD_STATIC_DRV
|
|
|
|
pkgconfigdir = $(libdir)/pkgconfig
|
|
diff --git a/configure.ac b/configure.ac
|
|
index b198417..4ed111e 100644
|
|
--- a/configure.ac
|
|
+++ b/configure.ac
|
|
@@ -21,6 +21,9 @@ LT_INIT
|
|
AC_SUBST([hardcode_into_libs], [no])
|
|
AM_PROG_CC_C_O
|
|
|
|
+# Support assembler
|
|
+AM_PROG_AS
|
|
+
|
|
AC_ARG_ENABLE([debug-log],
|
|
AS_HELP_STRING([--enable-debug-log], [enable debug logging globally]),
|
|
[ AS_IF([test "x$enable_debug_log" = "xyes"],
|
|
diff --git a/drv/isa_ce_sm3.c b/drv/isa_ce_sm3.c
|
|
new file mode 100644
|
|
index 0000000..f16bdd3
|
|
--- /dev/null
|
|
+++ b/drv/isa_ce_sm3.c
|
|
@@ -0,0 +1,401 @@
|
|
+// SPDX-License-Identifier: Apache-2.0
|
|
+/*
|
|
+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ *
|
|
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+ * this file except in compliance with the License. You can obtain a copy
|
|
+ * in the file LICENSE in the source distribution or at
|
|
+ * https://www.openssl.org/source/license.html
|
|
+ */
|
|
+/*
|
|
+ * Copyright 2023 Huawei Technologies Co.,Ltd. All rights reserved.
|
|
+ */
|
|
+
|
|
+#include <stdlib.h>
|
|
+#include <sys/auxv.h>
|
|
+#include <pthread.h>
|
|
+#include "drv/isa_ce_sm3.h"
|
|
+#include "drv/wd_digest_drv.h"
|
|
+#include "wd_digest.h"
|
|
+#include "wd_util.h"
|
|
+
|
|
+typedef void (sm3_ce_block_fn)(__u32 word_reg[SM3_STATE_WORDS],
|
|
+ const unsigned char *src, size_t blocks);
|
|
+
|
|
+static int sm3_ce_drv_init(struct wd_alg_driver *drv, void *conf);
|
|
+static void sm3_ce_drv_exit(struct wd_alg_driver *drv);
|
|
+static int sm3_ce_drv_send(struct wd_alg_driver *drv, handle_t ctx, void *digest_msg);
|
|
+static int sm3_ce_drv_recv(struct wd_alg_driver *drv, handle_t ctx, void *digest_msg);
|
|
+static int sm3_ce_get_usage(void *param);
|
|
+
|
|
+static struct wd_alg_driver sm3_ce_alg_driver = {
|
|
+ .drv_name = "isa_ce_sm3",
|
|
+ .alg_name = "sm3",
|
|
+ .calc_type = UADK_ALG_CE_INSTR,
|
|
+ .priority = 200,
|
|
+ .queue_num = 1,
|
|
+ .op_type_num = 1,
|
|
+ .fallback = 0,
|
|
+ .init = sm3_ce_drv_init,
|
|
+ .exit = sm3_ce_drv_exit,
|
|
+ .send = sm3_ce_drv_send,
|
|
+ .recv = sm3_ce_drv_recv,
|
|
+ .get_usage = sm3_ce_get_usage,
|
|
+};
|
|
+
|
|
+static void __attribute__((constructor)) sm3_ce_probe(void)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ WD_INFO("Info: register SM3 CE alg driver!\n");
|
|
+ ret = wd_alg_driver_register(&sm3_ce_alg_driver);
|
|
+ if (ret && ret != -WD_ENODEV)
|
|
+ WD_ERR("Error: register SM3 CE failed!\n");
|
|
+}
|
|
+
|
|
+static void __attribute__((destructor)) sm3_ce_remove(void)
|
|
+{
|
|
+ wd_alg_driver_unregister(&sm3_ce_alg_driver);
|
|
+}
|
|
+
|
|
+static int sm3_ce_get_usage(void *param)
|
|
+{
|
|
+ return WD_SUCCESS;
|
|
+}
|
|
+
|
|
+static inline void sm3_ce_init(struct sm3_ce_ctx *sctx)
|
|
+{
|
|
+ sctx->word_reg[0] = SM3_IVA;
|
|
+ sctx->word_reg[1] = SM3_IVB;
|
|
+ sctx->word_reg[2] = SM3_IVC;
|
|
+ sctx->word_reg[3] = SM3_IVD;
|
|
+ sctx->word_reg[4] = SM3_IVE;
|
|
+ sctx->word_reg[5] = SM3_IVF;
|
|
+ sctx->word_reg[6] = SM3_IVG;
|
|
+ sctx->word_reg[7] = SM3_IVH;
|
|
+}
|
|
+
|
|
+static void trans_output_result(__u8 *out_digest, __u32 *word_reg)
|
|
+{
|
|
+ size_t i;
|
|
+
|
|
+ for (i = 0; i < SM3_STATE_WORDS; i++)
|
|
+ PUTU32_TO_U8(out_digest + i * WORD_TO_CHAR_OFFSET, word_reg[i]);
|
|
+}
|
|
+
|
|
+static void sm3_ce_init_ex(struct sm3_ce_ctx *sctx, __u8 *iv, __u16 iv_bytes)
|
|
+{
|
|
+ size_t i;
|
|
+
|
|
+ if (iv_bytes != SM3_DIGEST_SIZE) {
|
|
+ WD_ERR("invalid iv size: %u\n", iv_bytes);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < SM3_STATE_WORDS; i++)
|
|
+ PUTU8_TO_U32(sctx->word_reg[i], iv + i * WORD_TO_CHAR_OFFSET);
|
|
+}
|
|
+
|
|
+static void sm3_ce_update(struct sm3_ce_ctx *sctx, const __u8 *data,
|
|
+ size_t data_len, sm3_ce_block_fn *block_fn)
|
|
+{
|
|
+ size_t remain_data_len, blk_num;
|
|
+
|
|
+ /* Get the data num that need compute currently */
|
|
+ sctx->num &= (SM3_BLOCK_SIZE - 1);
|
|
+
|
|
+ if (sctx->num) {
|
|
+ remain_data_len = SM3_BLOCK_SIZE - sctx->num;
|
|
+ /* If data_len does not enough a block size, then leave it to final */
|
|
+ if (data_len < remain_data_len) {
|
|
+ memcpy(sctx->block + sctx->num, data, data_len);
|
|
+ sctx->num += data_len;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ memcpy(sctx->block + sctx->num, data, remain_data_len);
|
|
+ block_fn(sctx->word_reg, sctx->block, 1);
|
|
+ sctx->nblocks++;
|
|
+ data += remain_data_len;
|
|
+ data_len -= remain_data_len;
|
|
+ }
|
|
+
|
|
+ /* Group the filled msg by 512-bits (64-bytes) */
|
|
+ blk_num = data_len / SM3_BLOCK_SIZE;
|
|
+ if (blk_num) {
|
|
+ block_fn(sctx->word_reg, data, blk_num);
|
|
+ sctx->nblocks += blk_num;
|
|
+ data += SM3_BLOCK_SIZE * blk_num;
|
|
+ data_len -= SM3_BLOCK_SIZE * blk_num;
|
|
+ }
|
|
+
|
|
+ sctx->num = data_len;
|
|
+ if (data_len)
|
|
+ memcpy(sctx->block, data, data_len);
|
|
+}
|
|
+
|
|
+static void sm3_ce_final(struct sm3_ce_ctx *sctx, __u8 *md,
|
|
+ sm3_ce_block_fn *block_fn)
|
|
+{
|
|
+ size_t i, offset1, offset2;
|
|
+ __u64 nh, nl;
|
|
+
|
|
+ sctx->num &= (SM3_BLOCK_SIZE - 1);
|
|
+ sctx->block[sctx->num] = SM3_PADDING_BYTE;
|
|
+
|
|
+ if (sctx->num <= SM3_BLOCK_SIZE - BIT_TO_BLOCK_OFFSET) {
|
|
+ memset(sctx->block + sctx->num + 1, 0, SM3_BLOCK_SIZE - sctx->num - 9);
|
|
+ } else {
|
|
+ memset(sctx->block + sctx->num + 1, 0, SM3_BLOCK_SIZE - sctx->num - 1);
|
|
+ block_fn(sctx->word_reg, sctx->block, 1);
|
|
+ memset(sctx->block, 0, SM3_BLOCK_SIZE - 8);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Put the length of the message in bits into the last
|
|
+ * 64-bits (penultimate two words).
|
|
+ */
|
|
+ offset2 = SM3_BLOCK_SIZE - WORD_TO_CHAR_OFFSET * 2;
|
|
+ offset1 = SM3_BLOCK_SIZE - WORD_TO_CHAR_OFFSET;
|
|
+ nh = sctx->nblocks >> NH_OFFSET;
|
|
+ nl = (sctx->nblocks << BIT_TO_BLOCK_OFFSET) + (sctx->num << BIT_TO_BYTE_OFFSET);
|
|
+ PUTU32_TO_U8(sctx->block + offset2 , nh);
|
|
+ PUTU32_TO_U8(sctx->block + offset1, nl);
|
|
+
|
|
+ block_fn(sctx->word_reg, sctx->block, 1);
|
|
+ for (i = 0; i < SM3_STATE_WORDS; i++)
|
|
+ PUTU32_TO_U8(md + i * WORD_TO_CHAR_OFFSET, sctx->word_reg[i]);
|
|
+}
|
|
+
|
|
+static int do_sm3_ce(struct wd_digest_msg *msg, __u8 *out_digest)
|
|
+{
|
|
+ enum hash_block_type block_type;
|
|
+ struct sm3_ce_ctx sctx = {0};
|
|
+ size_t data_len, iv_len;
|
|
+ __u8 *data, *iv;
|
|
+
|
|
+ block_type = get_hash_block_type(msg);
|
|
+ data_len = msg->in_bytes;
|
|
+ data = msg->in;
|
|
+ iv_len = SM3_DIGEST_SIZE;
|
|
+ /* Use last output as the iv in current cycle */
|
|
+ iv = msg->out;
|
|
+
|
|
+ switch(block_type) {
|
|
+ case HASH_SINGLE_BLOCK:
|
|
+ sm3_ce_init(&sctx);
|
|
+ sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress);
|
|
+ sm3_ce_final(&sctx, out_digest, sm3_ce_block_compress);
|
|
+ break;
|
|
+ case HASH_FRIST_BLOCK:
|
|
+ sm3_ce_init(&sctx);
|
|
+ sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress);
|
|
+ trans_output_result(out_digest, sctx.word_reg);
|
|
+ break;
|
|
+ case HASH_MIDDLE_BLOCK:
|
|
+ sm3_ce_init_ex(&sctx, iv, iv_len);
|
|
+ sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress);
|
|
+ /* Transform the middle result without final padding */
|
|
+ trans_output_result(out_digest, sctx.word_reg);
|
|
+ break;
|
|
+ case HASH_END_BLOCK:
|
|
+ sm3_ce_init_ex(&sctx, iv, iv_len);
|
|
+ sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress);
|
|
+ /* Put the whole message length in last 64-bits */
|
|
+ sctx.nblocks = msg->long_data_len / SM3_BLOCK_SIZE;
|
|
+ sm3_ce_final(&sctx, out_digest, sm3_ce_block_compress);
|
|
+ break;
|
|
+ default:
|
|
+ WD_ERR("Invalid block type!\n");
|
|
+ return -WD_EINVAL;
|
|
+ }
|
|
+
|
|
+ if (msg->out_bytes < SM3_DIGEST_SIZE)
|
|
+ memcpy(msg->out, out_digest, msg->out_bytes);
|
|
+ else
|
|
+ memcpy(msg->out, out_digest, SM3_DIGEST_SIZE);
|
|
+
|
|
+ memset(&sctx, 0, sizeof(struct sm3_ce_ctx));
|
|
+
|
|
+ return WD_SUCCESS;
|
|
+}
|
|
+
|
|
+static void sm3_hmac_key_padding(struct hmac_sm3_ctx *hctx,
|
|
+ const __u8 *key, size_t key_len)
|
|
+{
|
|
+ size_t i;
|
|
+
|
|
+ if (key_len <= SM3_BLOCK_SIZE) {
|
|
+ memcpy(hctx->key, key, key_len);
|
|
+ memset(hctx->key + key_len, 0, SM3_BLOCK_SIZE - key_len);
|
|
+ } else {
|
|
+ sm3_ce_init(&hctx->sctx);
|
|
+ sm3_ce_update(&hctx->sctx, key, key_len, sm3_ce_block_compress);
|
|
+ sm3_ce_final(&hctx->sctx, hctx->key, sm3_ce_block_compress);
|
|
+ /* Pad key to SM3_BLOCK_SIZE after hash */
|
|
+ memset(hctx->key + SM3_DIGEST_SIZE, 0,
|
|
+ SM3_BLOCK_SIZE - SM3_DIGEST_SIZE);
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < SM3_BLOCK_SIZE; i++) {
|
|
+ hctx->key[i] ^= IPAD_DATA;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void sm3_ce_hmac_init(struct hmac_sm3_ctx *hctx, const __u8 *key, size_t key_len)
|
|
+{
|
|
+ sm3_hmac_key_padding(hctx, key, key_len);
|
|
+
|
|
+ /* Ipadded key is the first block to hash in first cycle */
|
|
+ sm3_ce_init(&hctx->sctx);
|
|
+ sm3_ce_update(&hctx->sctx, hctx->key, SM3_BLOCK_SIZE, sm3_ce_block_compress);
|
|
+}
|
|
+
|
|
+static void sm3_ce_hmac_update(struct hmac_sm3_ctx *hctx, const __u8 *data, size_t data_len)
|
|
+{
|
|
+ sm3_ce_update(&hctx->sctx, data, data_len, sm3_ce_block_compress);
|
|
+}
|
|
+
|
|
+static void sm3_ce_hmac_final(struct hmac_sm3_ctx *hctx, __u8 *out_hmac)
|
|
+{
|
|
+ __u8 digest[SM3_DIGEST_SIZE] = {0};
|
|
+ size_t i;
|
|
+
|
|
+ for (i = 0; i < SM3_BLOCK_SIZE; i++) {
|
|
+ hctx->key[i] ^= (IPAD_DATA ^ OPAD_DATA);
|
|
+ }
|
|
+
|
|
+ /* Compute the last data from update process */
|
|
+ sm3_ce_final(&hctx->sctx, digest, sm3_ce_block_compress);
|
|
+
|
|
+ /* Opadded key is the first block to hash in second cycle */
|
|
+ memset(&hctx->sctx, 0, sizeof(struct sm3_ce_ctx));
|
|
+ sm3_ce_init(&hctx->sctx);
|
|
+ sm3_ce_update(&hctx->sctx, hctx->key, SM3_BLOCK_SIZE, sm3_ce_block_compress);
|
|
+
|
|
+ /* Compute the the first cycle result */
|
|
+ sm3_ce_update(&hctx->sctx, digest, SM3_DIGEST_SIZE, sm3_ce_block_compress);
|
|
+ sm3_ce_final(&hctx->sctx, out_hmac, sm3_ce_block_compress);
|
|
+}
|
|
+
|
|
+static int do_hmac_sm3_ce(struct wd_digest_msg *msg, __u8 *out_hmac)
|
|
+{
|
|
+ size_t data_len, key_len, iv_len;
|
|
+ enum hash_block_type block_type;
|
|
+ struct hmac_sm3_ctx hctx = {0};
|
|
+ __u8 *data, *key, *iv;
|
|
+
|
|
+ data_len = msg->in_bytes;
|
|
+ data = msg->in;
|
|
+ key = msg->key;
|
|
+ key_len = msg->key_bytes;
|
|
+ iv_len = SM3_DIGEST_SIZE;
|
|
+ /* Use last output as the iv in current cycle */
|
|
+ iv = msg->out;
|
|
+
|
|
+ if (!key_len) {
|
|
+ WD_ERR("invalid hmac key_len is 0!\n");
|
|
+ return -WD_EINVAL;
|
|
+ }
|
|
+
|
|
+ block_type = get_hash_block_type(msg);
|
|
+ switch(block_type) {
|
|
+ case HASH_SINGLE_BLOCK:
|
|
+ sm3_ce_hmac_init(&hctx, key, key_len);
|
|
+ sm3_ce_hmac_update(&hctx, data, data_len);
|
|
+ sm3_ce_hmac_final(&hctx, out_hmac);
|
|
+ break;
|
|
+ case HASH_FRIST_BLOCK:
|
|
+ sm3_ce_hmac_init(&hctx, key, key_len);
|
|
+ sm3_ce_hmac_update(&hctx, data, data_len);
|
|
+ trans_output_result(out_hmac, hctx.sctx.word_reg);
|
|
+ break;
|
|
+ case HASH_MIDDLE_BLOCK:
|
|
+ sm3_ce_init_ex(&(hctx.sctx), iv, iv_len);
|
|
+ sm3_ce_hmac_update(&hctx, data, data_len);
|
|
+ trans_output_result(out_hmac, hctx.sctx.word_reg);
|
|
+ break;
|
|
+ case HASH_END_BLOCK:
|
|
+ sm3_hmac_key_padding(&hctx, key, key_len);
|
|
+ sm3_ce_init_ex(&(hctx.sctx), iv, iv_len);
|
|
+ sm3_ce_hmac_update(&hctx, data, data_len);
|
|
+ hctx.sctx.nblocks = msg->long_data_len / SM3_BLOCK_SIZE + KEY_BLOCK_NUM;
|
|
+ sm3_ce_hmac_final(&hctx, out_hmac);
|
|
+ break;
|
|
+ default:
|
|
+ WD_ERR("Invalid block type!\n");
|
|
+ return -WD_EINVAL;
|
|
+ }
|
|
+
|
|
+ if (msg->out_bytes < SM3_DIGEST_SIZE)
|
|
+ memcpy(msg->out, out_hmac, msg->out_bytes);
|
|
+ else
|
|
+ memcpy(msg->out, out_hmac, SM3_DIGEST_SIZE);
|
|
+
|
|
+ memset(&hctx, 0, sizeof(struct hmac_sm3_ctx));
|
|
+
|
|
+ return WD_SUCCESS;
|
|
+}
|
|
+
|
|
+static int sm3_ce_drv_send(struct wd_alg_driver *drv, handle_t ctx, void *digest_msg)
|
|
+{
|
|
+ struct wd_digest_msg *msg = (struct wd_digest_msg *)digest_msg;
|
|
+ __u8 digest[SM3_DIGEST_SIZE] = {0};
|
|
+ int ret;
|
|
+
|
|
+ if (!msg) {
|
|
+ WD_ERR("invalid: digest_msg is NULL!\n");
|
|
+ return -WD_EINVAL;
|
|
+ }
|
|
+
|
|
+ if (msg->data_fmt == WD_SGL_BUF) {
|
|
+ WD_ERR("invalid: SM3 CE driver do not support sgl data format!\n");
|
|
+ return -WD_EINVAL;
|
|
+ }
|
|
+
|
|
+ if (msg->mode == WD_DIGEST_NORMAL) {
|
|
+ ret = do_sm3_ce(msg, digest);
|
|
+ } else if (msg->mode == WD_DIGEST_HMAC) {
|
|
+ ret = do_hmac_sm3_ce(msg, digest);
|
|
+ } else {
|
|
+ WD_ERR("invalid digest mode!\n");
|
|
+ ret = -WD_EINVAL;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int sm3_ce_drv_recv(struct wd_alg_driver *drv, handle_t ctx, void *digest_msg)
|
|
+{
|
|
+ return WD_SUCCESS;
|
|
+}
|
|
+
|
|
+static int sm3_ce_drv_init(struct wd_alg_driver *drv, void *conf)
|
|
+{
|
|
+ struct wd_ctx_config_internal *config = (struct wd_ctx_config_internal *)conf;
|
|
+ struct sm3_ce_drv_ctx *sctx = (struct sm3_ce_drv_ctx *)drv->priv;
|
|
+
|
|
+ config->epoll_en = false;
|
|
+
|
|
+ /* return if already inited */
|
|
+ if (sctx)
|
|
+ return WD_SUCCESS;
|
|
+ sctx = malloc(sizeof(struct sm3_ce_drv_ctx));
|
|
+ if (!sctx)
|
|
+ return -WD_EINVAL;
|
|
+
|
|
+ memcpy(&sctx->config, config, sizeof(struct wd_ctx_config_internal));
|
|
+
|
|
+ return WD_SUCCESS;
|
|
+}
|
|
+
|
|
+static void sm3_ce_drv_exit(struct wd_alg_driver *drv)
|
|
+{
|
|
+ struct sm3_ce_drv_ctx *sctx = (struct sm3_ce_drv_ctx *)drv->priv;
|
|
+
|
|
+ if (!sctx)
|
|
+ return;
|
|
+
|
|
+ free(sctx);
|
|
+ drv->priv = NULL;
|
|
+}
|
|
diff --git a/drv/isa_ce_sm3.h b/drv/isa_ce_sm3.h
|
|
new file mode 100644
|
|
index 0000000..13edb0a
|
|
--- /dev/null
|
|
+++ b/drv/isa_ce_sm3.h
|
|
@@ -0,0 +1,86 @@
|
|
+/* SPDX-License-Identifier: Apache-2.0 */
|
|
+/* Copyright 2020-2021 Huawei Technologies Co.,Ltd. All rights reserved. */
|
|
+#ifndef __ISA_CE_SM3_H
|
|
+#define __ISA_CE_SM3_H
|
|
+
|
|
+#include "wd_alg_common.h"
|
|
+
|
|
+#ifdef __cplusplus
|
|
+extern "C" {
|
|
+#endif
|
|
+
|
|
+#define SM3_DIGEST_SIZE 32
|
|
+#define SM3_BLOCK_SIZE 64
|
|
+#define SM3_STATE_WORDS 8
|
|
+#define HMAC_BLOCK_SIZE 64
|
|
+#define WORD_TO_CHAR_OFFSET 4
|
|
+#define SM3_PADDING_BYTE 0x80
|
|
+#define NH_OFFSET 23
|
|
+#define BIT_TO_BLOCK_OFFSET 9
|
|
+#define BIT_TO_BYTE_OFFSET 3
|
|
+#define IPAD_DATA 0x36
|
|
+#define OPAD_DATA 0x5c
|
|
+#define KEY_BLOCK_NUM 1
|
|
+
|
|
+#define SM3_IVA 0x7380166f
|
|
+#define SM3_IVB 0x4914b2b9
|
|
+#define SM3_IVC 0x172442d7
|
|
+#define SM3_IVD 0xda8a0600
|
|
+#define SM3_IVE 0xa96f30bc
|
|
+#define SM3_IVF 0x163138aa
|
|
+#define SM3_IVG 0xe38dee4d
|
|
+#define SM3_IVH 0xb0fb0e4e
|
|
+
|
|
+#define PUTU32_TO_U8(dst, src) \
|
|
+ ((dst)[0] = (__u8)((src) >> 24), \
|
|
+ (dst)[1] = (__u8)((src) >> 16), \
|
|
+ (dst)[2] = (__u8)((src) >> 8), \
|
|
+ (dst)[3] = (__u8)(src))
|
|
+
|
|
+#define PUTU8_TO_U32(dst, src) \
|
|
+ ((dst) = (((__u32)(src)[0]) << 24) + \
|
|
+ (((__u32)(src)[1]) << 16) + \
|
|
+ (((__u32)(src)[2]) << 8) + \
|
|
+ ((__u32)(src)[3]))
|
|
+
|
|
+struct sm3_ce_ctx {
|
|
+ /*
|
|
+ * Use an array to represent the eight 32-bits word registers,
|
|
+ * SM3_IVA, SM3_IVB, ..., SM3_IVH, save IV and the final digest.
|
|
+ */
|
|
+ __u32 word_reg[SM3_STATE_WORDS];
|
|
+ /*
|
|
+ * The length (in bits) of all the msg fragments, the length of the
|
|
+ * whole msg should less than 2^64 bit, a msg block is 512-bits,
|
|
+ * make a 64-bits number in two parts, low 32-bits - 'Nl' and
|
|
+ * high 32-bits - 'Nh'.
|
|
+ */
|
|
+ __u64 nblocks;
|
|
+ /*
|
|
+ * Message block, a msg block is 512-bits, use sixteen __u32 type
|
|
+ * element to store it, used in B(i) = W0||W1||W2||...||W15.
|
|
+ * Use a __u8 array to replace the 32-bit array.
|
|
+ */
|
|
+ __u8 block[SM3_BLOCK_SIZE];
|
|
+ /* The number of msg that need to compute in current cycle or turn. */
|
|
+ size_t num;
|
|
+};
|
|
+
|
|
+struct hmac_sm3_ctx {
|
|
+ struct sm3_ce_ctx sctx;
|
|
+ /* Save user key */
|
|
+ __u8 key[SM3_BLOCK_SIZE];
|
|
+};
|
|
+
|
|
+struct sm3_ce_drv_ctx {
|
|
+ struct wd_ctx_config_internal config;
|
|
+};
|
|
+
|
|
+void sm3_ce_block_compress(__u32 word_reg[SM3_STATE_WORDS],
|
|
+ const __u8 *src, size_t blocks);
|
|
+
|
|
+#ifdef __cplusplus
|
|
+}
|
|
+#endif
|
|
+
|
|
+#endif /* __ISA_CE_SM3_H */
|
|
diff --git a/drv/isa_ce_sm3_armv8.S b/drv/isa_ce_sm3_armv8.S
|
|
new file mode 100644
|
|
index 0000000..3d08e2d
|
|
--- /dev/null
|
|
+++ b/drv/isa_ce_sm3_armv8.S
|
|
@@ -0,0 +1,765 @@
|
|
+/* SPDX-License-Identifier: Apache-2.0 */
|
|
+/*
|
|
+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ *
|
|
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+ * this file except in compliance with the License. You can obtain a copy
|
|
+ * in the file LICENSE in the source distribution or at
|
|
+ * https://www.openssl.org/source/license.html
|
|
+ */
|
|
+
|
|
+#include "../include/drv/arm_arch_ce.h"
|
|
+
|
|
+.arch armv8.2-a
|
|
+.text
|
|
+.globl sm3_ce_block_compress
|
|
+.type sm3_ce_block_compress,%function
|
|
+.align 5
|
|
+sm3_ce_block_compress:
|
|
+ AARCH64_VALID_CALL_TARGET
|
|
+/* Loads state */
|
|
+ /*
|
|
+ * Loads multiple single-element structures from memory(X0 register) and
|
|
+ * writes result to two SIMD&FP registers(v5.4s and v6.4s).
|
|
+ */
|
|
+ ld1 {v5.4s,v6.4s}, [x0] /* 4s -- 4 * 32bit */
|
|
+ /*
|
|
+ * Reverses the order of 32-bit(type:s) elements in each doubleword of the
|
|
+ * vector in the src SIMD&FP register(v5), places the result into a vector
|
|
+ * and writes the vector to the dst SIDM&FP register(v5).
|
|
+ */
|
|
+ rev64 v5.4s, v5.4s
|
|
+ rev64 v6.4s, v6.4s
|
|
+ /*
|
|
+ * Extracts the lowest vector elements from the second src SIMD&FP register,
|
|
+ * and highest vector elements from the first source SIMD&FP register,
|
|
+ * concatenates the result into a vector, and writes the vector to the
|
|
+ * dst SIMD&FP register vector. #8 means the numbered byte element to be extracted.
|
|
+ * Format: ext <dst register>, <first src register>, <second src register>, <index>
|
|
+ * #imm: immediate data.
|
|
+ */
|
|
+ ext v5.16b, v5.16b, v5.16b, #8 /* 16b -- 16 * 8bit */
|
|
+ ext v6.16b, v6.16b, v6.16b, #8
|
|
+ /* From PC-relative address adds an immediate value to form a PC-relative
|
|
+ * address, and writes the result to the dst register.
|
|
+ */
|
|
+ adr x8, .Tj /* 'Tj' is the constant defined in SM3 protocol */
|
|
+ /* Loads pair of register calculates an address from a base register value
|
|
+ * and an immediate offset, loads two 32-bit words from memory, and writes
|
|
+ * them to two registers. */
|
|
+ ldp s16, s17, [x8] /* 'sn' is the scalar register, 'vn' is the vector register */
|
|
+
|
|
+.Loop:
|
|
+/* Loads input */
|
|
+ /*
|
|
+ * Loads multipule single-element structrue to four registers.
|
|
+ * #64 is the immediate offset variant, it is the post-index immediate offset.
|
|
+ * Loads the input src data, msg to be hashed.
|
|
+ */
|
|
+ ld1 {v0.16b,v1.16b,v2.16b,v3.16b}, [x1], #64
|
|
+ /*
|
|
+ * Substracts an optionally-shifted immediate value from a register value,
|
|
+ * and writes the result to the dst register.
|
|
+ */
|
|
+ sub w2, w2, #1
|
|
+
|
|
+ /* Copies the value in a src register to the dst register. */
|
|
+ mov v18.16b, v5.16b
|
|
+ mov v19.16b, v6.16b
|
|
+
|
|
+#ifndef __ARMEB__
|
|
+ rev32 v0.16b, v0.16b
|
|
+ rev32 v1.16b, v1.16b
|
|
+ rev32 v2.16b, v2.16b
|
|
+ rev32 v3.16b, v3.16b
|
|
+#endif
|
|
+
|
|
+ ext v20.16b, v16.16b, v16.16b, #4
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v4.16b, v1.16b, v2.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v0.16b, v1.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v2.16b, v3.16b, #8
|
|
+ /* sm3partw1 v4.4s, v0.4s, v3.4s */
|
|
+.inst 0xce63c004
|
|
+ /* sm3partw2 v4.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e4
|
|
+ eor v22.16b, v0.16b, v1.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5682e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v0.4s[0] */
|
|
+.inst 0xce408ae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5692e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v0.4s[1] */
|
|
+.inst 0xce409ae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a2e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v0.4s[2] */
|
|
+.inst 0xce40aae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b2e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v0.4s[3] */
|
|
+.inst 0xce40bae6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v0.16b, v2.16b, v3.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v1.16b, v2.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v3.16b, v4.16b, #8
|
|
+ /* sm3partw1 v0.4s, v1.4s, v4.4s */
|
|
+.inst 0xce64c020
|
|
+ /* sm3partw2 v0.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e0
|
|
+ eor v22.16b, v1.16b, v2.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5682e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v1.4s[0] */
|
|
+.inst 0xce418ae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5692e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v1.4s[1] */
|
|
+.inst 0xce419ae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a2e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v1.4s[2] */
|
|
+.inst 0xce41aae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b2e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v1.4s[3] */
|
|
+.inst 0xce41bae6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v1.16b, v3.16b, v4.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v2.16b, v3.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v4.16b, v0.16b, #8
|
|
+ /* sm3partw1 v1.4s, v2.4s, v0.4s */
|
|
+.inst 0xce60c041
|
|
+ /* sm3partw2 v1.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e1
|
|
+ eor v22.16b, v2.16b, v3.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5682e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v2.4s[0] */
|
|
+.inst 0xce428ae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5692e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v2.4s[1] */
|
|
+.inst 0xce429ae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a2e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v2.4s[2] */
|
|
+.inst 0xce42aae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b2e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v2.4s[3] */
|
|
+.inst 0xce42bae6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v2.16b, v4.16b, v0.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v3.16b, v4.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v0.16b, v1.16b, #8
|
|
+ /* sm3partw1 v2.4s, v3.4s, v1.4s */
|
|
+.inst 0xce61c062
|
|
+ /* sm3partw2 v2.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e2
|
|
+ eor v22.16b, v3.16b, v4.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5682e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v3.4s[0] */
|
|
+.inst 0xce438ae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5692e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v3.4s[1] */
|
|
+.inst 0xce439ae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a2e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v3.4s[2] */
|
|
+.inst 0xce43aae6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1a v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b2e5
|
|
+ /* sm3tt2a v6.4s, v23.4s, v3.4s[3] */
|
|
+.inst 0xce43bae6
|
|
+ ext v20.16b, v17.16b, v17.16b, #4
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v3.16b, v0.16b, v1.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v4.16b, v0.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v1.16b, v2.16b, #8
|
|
+ /* sm3partw1 v3.4s, v4.4s, v2.4s */
|
|
+.inst 0xce62c083
|
|
+ /* sm3partw2 v3.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e3
|
|
+ eor v22.16b, v4.16b, v0.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[0] */
|
|
+.inst 0xce448ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[1] */
|
|
+.inst 0xce449ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[2] */
|
|
+.inst 0xce44aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[3] */
|
|
+.inst 0xce44bee6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v4.16b, v1.16b, v2.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v0.16b, v1.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v2.16b, v3.16b, #8
|
|
+ /* sm3partw1 v4.4s, v0.4s, v3.4s */
|
|
+.inst 0xce63c004
|
|
+ /* sm3partw2 v4.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e4
|
|
+ eor v22.16b, v0.16b, v1.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[0] */
|
|
+.inst 0xce408ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[1] */
|
|
+.inst 0xce409ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[2] */
|
|
+.inst 0xce40aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[3] */
|
|
+.inst 0xce40bee6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v0.16b, v2.16b, v3.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v1.16b, v2.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v3.16b, v4.16b, #8
|
|
+ /* sm3partw1 v0.4s, v1.4s, v4.4s */
|
|
+.inst 0xce64c020
|
|
+ /* sm3partw2 v0.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e0
|
|
+ eor v22.16b, v1.16b, v2.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v1.4s[0] */
|
|
+.inst 0xce418ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v1.4s[1] */
|
|
+.inst 0xce419ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v1.4s[2] */
|
|
+.inst 0xce41aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v1.4s[3] */
|
|
+.inst 0xce41bee6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v1.16b, v3.16b, v4.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v2.16b, v3.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v4.16b, v0.16b, #8
|
|
+ /* sm3partw1 v1.4s, v2.4s, v0.4s */
|
|
+.inst 0xce60c041
|
|
+ /* sm3partw2 v1.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e1
|
|
+ eor v22.16b, v2.16b, v3.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v2.4s[0] */
|
|
+.inst 0xce428ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v2.4s[1] */
|
|
+.inst 0xce429ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v2.4s[2] */
|
|
+.inst 0xce42aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v2.4s[3] */
|
|
+.inst 0xce42bee6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v2.16b, v4.16b, v0.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v3.16b, v4.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v0.16b, v1.16b, #8
|
|
+ /* sm3partw1 v2.4s, v3.4s, v1.4s */
|
|
+.inst 0xce61c062
|
|
+ /* sm3partw2 v2.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e2
|
|
+ eor v22.16b, v3.16b, v4.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v3.4s[0] */
|
|
+.inst 0xce438ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v3.4s[1] */
|
|
+.inst 0xce439ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v3.4s[2] */
|
|
+.inst 0xce43aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v3.4s[3] */
|
|
+.inst 0xce43bee6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v3.16b, v0.16b, v1.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v4.16b, v0.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v1.16b, v2.16b, #8
|
|
+ /* sm3partw1 v3.4s, v4.4s, v2.4s */
|
|
+.inst 0xce62c083
|
|
+ /* sm3partw2 v3.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e3
|
|
+ eor v22.16b, v4.16b, v0.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[0] */
|
|
+.inst 0xce448ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[1] */
|
|
+.inst 0xce449ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[2] */
|
|
+.inst 0xce44aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[3] */
|
|
+.inst 0xce44bee6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v4.16b, v1.16b, v2.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v0.16b, v1.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v2.16b, v3.16b, #8
|
|
+ /* sm3partw1 v4.4s, v0.4s, v3.4s */
|
|
+.inst 0xce63c004
|
|
+ /* sm3partw2 v4.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e4
|
|
+ eor v22.16b, v0.16b, v1.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[0] */
|
|
+.inst 0xce408ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[1] */
|
|
+.inst 0xce409ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[2] */
|
|
+.inst 0xce40aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[3] */
|
|
+.inst 0xce40bee6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v0.16b, v2.16b, v3.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v1.16b, v2.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v3.16b, v4.16b, #8
|
|
+ /* sm3partw1 v0.4s, v1.4s, v4.4s */
|
|
+.inst 0xce64c020
|
|
+ /* sm3partw2 v0.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e0
|
|
+ eor v22.16b, v1.16b, v2.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v1.4s[0] */
|
|
+.inst 0xce418ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v1.4s[1] */
|
|
+.inst 0xce419ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v1.4s[2] */
|
|
+.inst 0xce41aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v1.4s[3] */
|
|
+.inst 0xce41bee6
|
|
+ /* s4 = w7 | w8 | w9 | w10 */
|
|
+ ext v1.16b, v3.16b, v4.16b, #12
|
|
+ /* vtmp1 = w3 | w4 | w5 | w6 */
|
|
+ ext v22.16b, v2.16b, v3.16b, #12
|
|
+ /* vtmp2 = w10 | w11 | w12 | w13 */
|
|
+ ext v23.16b, v4.16b, v0.16b, #8
|
|
+ /* sm3partw1 v1.4s, v2.4s, v0.4s */
|
|
+.inst 0xce60c041
|
|
+ /* sm3partw2 v1.4s, v23.4s, v22.4s */
|
|
+.inst 0xce76c6e1
|
|
+ eor v22.16b, v2.16b, v3.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v2.4s[0] */
|
|
+.inst 0xce428ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v2.4s[1] */
|
|
+.inst 0xce429ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v2.4s[2] */
|
|
+.inst 0xce42aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v2.4s[3] */
|
|
+.inst 0xce42bee6
|
|
+ eor v22.16b, v3.16b, v4.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v3.4s[0] */
|
|
+.inst 0xce438ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v3.4s[1] */
|
|
+.inst 0xce439ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v3.4s[2] */
|
|
+.inst 0xce43aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v3.4s[3] */
|
|
+.inst 0xce43bee6
|
|
+ eor v22.16b, v4.16b, v0.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[0] */
|
|
+.inst 0xce448ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[1] */
|
|
+.inst 0xce449ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[2] */
|
|
+.inst 0xce44aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v4.4s[3] */
|
|
+.inst 0xce44bee6
|
|
+ eor v22.16b, v0.16b, v1.16b
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */
|
|
+.inst 0xce5686e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[0] */
|
|
+.inst 0xce408ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */
|
|
+.inst 0xce5696e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[1] */
|
|
+.inst 0xce409ee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */
|
|
+.inst 0xce5418b7
|
|
+ shl v21.4s, v20.4s, #1
|
|
+ sri v21.4s, v20.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */
|
|
+.inst 0xce56a6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[2] */
|
|
+.inst 0xce40aee6
|
|
+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */
|
|
+.inst 0xce5518b7
|
|
+ shl v20.4s, v21.4s, #1
|
|
+ sri v20.4s, v21.4s, #31
|
|
+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */
|
|
+.inst 0xce56b6e5
|
|
+ /* sm3tt2b v6.4s, v23.4s, v0.4s[3] */
|
|
+.inst 0xce40bee6
|
|
+ eor v5.16b, v5.16b, v18.16b
|
|
+ eor v6.16b, v6.16b, v19.16b
|
|
+ /*
|
|
+ * cbnz: compare and branch on Nonzero, compares the value in a register
|
|
+ * with zero, and conditionally branches to a label at a PC-relative offset
|
|
+ * if the comparison is not equal.
|
|
+ * 'w2' is the 32-bit name of the general-purpose register to be tested.
|
|
+ * '.Loop' is the program label to be conditionally branched to.
|
|
+ */
|
|
+ cbnz w2, .Loop
|
|
+
|
|
+ /* save state, it is the result of one cycle */
|
|
+ rev64 v5.4s, v5.4s
|
|
+ rev64 v6.4s, v6.4s
|
|
+ ext v5.16b, v5.16b, v5.16b, #8
|
|
+ ext v6.16b, v6.16b, v6.16b, #8
|
|
+ st1 {v5.4s,v6.4s}, [x0]
|
|
+ ret
|
|
+.size sm3_ce_block_compress,.-sm3_ce_block_compress
|
|
+
|
|
+.align 3
|
|
+.Tj:
|
|
+/*
|
|
+ * Inserts a list of 32-bit values as data into the assembly.
|
|
+ * In SM3 protocol:
|
|
+ * when 0 <= j <= 15, Tj = 0x79cc4519,
|
|
+ * when 16 <= j <= 63, Tj = 0x9d8a7a87.
|
|
+ */
|
|
+.word 0x79cc4519, 0x9d8a7a87
|
|
diff --git a/include/drv/arm_arch_ce.h b/include/drv/arm_arch_ce.h
|
|
new file mode 100644
|
|
index 0000000..3ea81a4
|
|
--- /dev/null
|
|
+++ b/include/drv/arm_arch_ce.h
|
|
@@ -0,0 +1,199 @@
|
|
+/* SPDX-License-Identifier: Apache-2.0 */
|
|
+/*
|
|
+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ *
|
|
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+ * this file except in compliance with the License. You can obtain a copy
|
|
+ * in the file LICENSE in the source distribution or at
|
|
+ * https://www.openssl.org/source/license.html
|
|
+ */
|
|
+
|
|
+#ifndef __ARM_ARCH_CE_H
|
|
+#define __ARM_ARCH_CE_H
|
|
+
|
|
+#ifdef __cplusplus
|
|
+extern "C" {
|
|
+#endif
|
|
+
|
|
+#if !defined(__ARM_ARCH__)
|
|
+# if defined(__CC_ARM)
|
|
+# define __ARM_ARCH__ __TARGET_ARCH_ARM
|
|
+# if defined(__BIG_ENDIAN)
|
|
+# define __ARMEB__
|
|
+# else
|
|
+# define __ARMEL__
|
|
+# endif
|
|
+# elif defined(__GNUC__)
|
|
+# if defined(__aarch64__)
|
|
+# define __ARM_ARCH__ 8
|
|
+ /*
|
|
+ * GCC does not define __ARM_ARCH__, instead it defines
|
|
+ * bunch of below macros. See all_architectures[] table in
|
|
+ * gcc/config/arm/arm.c.
|
|
+ */
|
|
+# elif defined(__ARM_ARCH)
|
|
+# define __ARM_ARCH__ __ARM_ARCH
|
|
+# elif defined(__ARM_ARCH_8A__)
|
|
+# define __ARM_ARCH__ 8
|
|
+# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
|
|
+ defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \
|
|
+ defined(__ARM_ARCH_7EM__)
|
|
+# define __ARM_ARCH__ 7
|
|
+# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
|
|
+ defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6M__) || \
|
|
+ defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || \
|
|
+ defined(__ARM_ARCH_6T2__)
|
|
+# define __ARM_ARCH__ 6
|
|
+# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \
|
|
+ defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
|
|
+ defined(__ARM_ARCH_5TEJ__)
|
|
+# define __ARM_ARCH__ 5
|
|
+# elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
|
|
+# define __ARM_ARCH__ 4
|
|
+# else
|
|
+# error "unsupported ARM architecture"
|
|
+# endif
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#if !defined(__ARM_MAX_ARCH__)
|
|
+# define __ARM_MAX_ARCH__ __ARM_ARCH__
|
|
+#endif
|
|
+
|
|
+#if __ARM_MAX_ARCH__ < __ARM_ARCH__
|
|
+# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
|
|
+#elif __ARM_MAX_ARCH__ != __ARM_ARCH__
|
|
+# if __ARM_ARCH__ < 7 && __ARM_MAX_ARCH__ >= 7 && defined(__ARMEB__)
|
|
+# error "can't build universal big-endian binary"
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#ifndef __ASSEMBLER__
|
|
+extern unsigned int ARMCAP_P;
|
|
+extern unsigned int ARM_MIDR;
|
|
+#endif
|
|
+
|
|
+#define ARMV7_NEON (1<<0)
|
|
+#define ARMV7_TICK (1<<1)
|
|
+#define ARMV8_AES (1<<2)
|
|
+#define ARMV8_SHA1 (1<<3)
|
|
+#define ARMV8_SHA256 (1<<4)
|
|
+#define ARMV8_PMULL (1<<5)
|
|
+#define ARMV8_SHA512 (1<<6)
|
|
+#define ARMV8_CPUID (1<<7)
|
|
+#define ARMV8_RNG (1<<8)
|
|
+#define ARMV8_SM3 (1<<9)
|
|
+#define ARMV8_SM4 (1<<10)
|
|
+#define ARMV8_SHA3 (1<<11)
|
|
+#define ARMV8_UNROLL8_EOR3 (1<<12)
|
|
+#define ARMV8_SVE (1<<13)
|
|
+#define ARMV8_SVE2 (1<<14)
|
|
+
|
|
+/*
|
|
+ * MIDR_EL1 system register
|
|
+ *
|
|
+ * 63___ _ ___32_31___ _ ___24_23_____20_19_____16_15__ _ __4_3_______0
|
|
+ * | | | | | | |
|
|
+ * |RES0 | Implementer | Variant | Arch | PartNum |Revision|
|
|
+ * |____ _ _____|_____ _ _____|_________|_______ _|____ _ ___|________|
|
|
+ *
|
|
+ */
|
|
+
|
|
+#define ARM_CPU_IMP_ARM 0x41
|
|
+#define HISI_CPU_IMP 0x48
|
|
+
|
|
+#define ARM_CPU_PART_CORTEX_A72 0xD08
|
|
+#define ARM_CPU_PART_N1 0xD0C
|
|
+#define ARM_CPU_PART_V1 0xD40
|
|
+#define ARM_CPU_PART_N2 0xD49
|
|
+#define HISI_CPU_PART_KP920 0xD01
|
|
+
|
|
+#define MIDR_PARTNUM_SHIFT 4
|
|
+#define MIDR_PARTNUM_MASK (0xfffU << MIDR_PARTNUM_SHIFT)
|
|
+#define MIDR_PARTNUM(midr) \
|
|
+ (((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT)
|
|
+
|
|
+#define MIDR_IMPLEMENTER_SHIFT 24
|
|
+#define MIDR_IMPLEMENTER_MASK (0xffU << MIDR_IMPLEMENTER_SHIFT)
|
|
+#define MIDR_IMPLEMENTER(midr) \
|
|
+ (((midr) & MIDR_IMPLEMENTER_MASK) >> MIDR_IMPLEMENTER_SHIFT)
|
|
+
|
|
+#define MIDR_ARCHITECTURE_SHIFT 16
|
|
+#define MIDR_ARCHITECTURE_MASK (0xfU << MIDR_ARCHITECTURE_SHIFT)
|
|
+#define MIDR_ARCHITECTURE(midr) \
|
|
+ (((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT)
|
|
+
|
|
+#define MIDR_CPU_MODEL_MASK \
|
|
+ (MIDR_IMPLEMENTER_MASK | \
|
|
+ MIDR_PARTNUM_MASK | \
|
|
+ MIDR_ARCHITECTURE_MASK)
|
|
+
|
|
+#define MIDR_CPU_MODEL(imp, partnum) \
|
|
+ (((imp) << MIDR_IMPLEMENTER_SHIFT) | \
|
|
+ (0xfU << MIDR_ARCHITECTURE_SHIFT) | \
|
|
+ ((partnum) << MIDR_PARTNUM_SHIFT))
|
|
+
|
|
+#define MIDR_IS_CPU_MODEL(midr, imp, partnum) \
|
|
+ (((midr) & MIDR_CPU_MODEL_MASK) == MIDR_CPU_MODEL(imp, partnum))
|
|
+
|
|
+#if defined(__ASSEMBLER__)
|
|
+ /*
|
|
+ * Support macros for
|
|
+ * - Armv8.3-A Pointer Authentication and
|
|
+ * - Armv8.5-A Branch Target Identification
|
|
+ * features which require emitting a .note.gnu.property section with the
|
|
+ * appropriate architecture-dependent feature bits set.
|
|
+ * Read more: "ELF for the Arm?? 64-bit Architecture"
|
|
+ */
|
|
+# if defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT == 1
|
|
+# define GNU_PROPERTY_AARCH64_BTI (1 << 0) /* Has Branch Target Identification */
|
|
+# define AARCH64_VALID_CALL_TARGET hint #34 /* BTI 'c' */
|
|
+# else
|
|
+# define GNU_PROPERTY_AARCH64_BTI 0 /* No Branch Target Identification */
|
|
+# define AARCH64_VALID_CALL_TARGET
|
|
+# endif
|
|
+
|
|
+# if defined(__ARM_FEATURE_PAC_DEFAULT) && \
|
|
+ (__ARM_FEATURE_PAC_DEFAULT & 1) == 1 /* Signed with A-key */
|
|
+# define GNU_PROPERTY_AARCH64_POINTER_AUTH (1 << 1) /* Has Pointer Authentication */
|
|
+# define AARCH64_SIGN_LINK_REGISTER hint #25 /* PACIASP */
|
|
+# define AARCH64_VALIDATE_LINK_REGISTER hint #29 /* AUTIASP */
|
|
+# elif defined(__ARM_FEATURE_PAC_DEFAULT) && \
|
|
+ (__ARM_FEATURE_PAC_DEFAULT & 2) == 2 /* Signed with B-key */
|
|
+# define GNU_PROPERTY_AARCH64_POINTER_AUTH (1 << 1) /* Has Pointer Authentication */
|
|
+# define AARCH64_SIGN_LINK_REGISTER hint #27 /* PACIBSP */
|
|
+# define AARCH64_VALIDATE_LINK_REGISTER hint #31 /* AUTIBSP */
|
|
+# else
|
|
+# define GNU_PROPERTY_AARCH64_POINTER_AUTH 0 /* No Pointer Authentication */
|
|
+# if GNU_PROPERTY_AARCH64_BTI != 0
|
|
+# define AARCH64_SIGN_LINK_REGISTER AARCH64_VALID_CALL_TARGET
|
|
+# else
|
|
+# define AARCH64_SIGN_LINK_REGISTER
|
|
+# endif
|
|
+# define AARCH64_VALIDATE_LINK_REGISTER
|
|
+# endif
|
|
+
|
|
+# if GNU_PROPERTY_AARCH64_POINTER_AUTH != 0 || GNU_PROPERTY_AARCH64_BTI != 0
|
|
+ .pushsection .note.gnu.property, "a";
|
|
+ .balign 8;
|
|
+ .long 4;
|
|
+ .long 0x10;
|
|
+ .long 0x5;
|
|
+ .asciz "GNU";
|
|
+ .long 0xc0000000; /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */
|
|
+ .long 4;
|
|
+ .long (GNU_PROPERTY_AARCH64_POINTER_AUTH | GNU_PROPERTY_AARCH64_BTI);
|
|
+ .long 0;
|
|
+ .popsection;
|
|
+# endif
|
|
+
|
|
+#endif /* defined __ASSEMBLER__ */
|
|
+
|
|
+#define IS_CPU_SUPPORT_UNROLL8_EOR3() \
|
|
+ (ARMCAP_P & ARMV8_UNROLL8_EOR3)
|
|
+
|
|
+#ifdef __cplusplus
|
|
+}
|
|
+#endif
|
|
+
|
|
+#endif /* __ARM_ARCH_CE_H */
|
|
diff --git a/include/wd_alg.h b/include/wd_alg.h
|
|
index f8b136e..861b7d9 100644
|
|
--- a/include/wd_alg.h
|
|
+++ b/include/wd_alg.h
|
|
@@ -19,6 +19,49 @@ extern "C" {
|
|
#define ALG_NAME_SIZE 128
|
|
#define DEV_NAME_LEN 128
|
|
|
|
+/*
|
|
+ * Macros related to arm platform:
|
|
+ * ARM puts the feature bits for Crypto Extensions in AT_HWCAP2, whereas
|
|
+ * AArch64 used AT_HWCAP.
|
|
+ */
|
|
+#ifndef AT_HWCAP
|
|
+# define AT_HWCAP 16
|
|
+#endif
|
|
+
|
|
+#ifndef AT_HWCAP2
|
|
+# define AT_HWCAP2 26
|
|
+#endif
|
|
+
|
|
+#if defined(__arm__) || defined(__arm)
|
|
+# define HWCAP AT_HWCAP
|
|
+# define HWCAP_NEON (1 << 12)
|
|
+
|
|
+# define HWCAP_CE AT_HWCAP2
|
|
+# define HWCAP_CE_AES (1 << 0)
|
|
+# define HWCAP_CE_PMULL (1 << 1)
|
|
+# define HWCAP_CE_SHA1 (1 << 2)
|
|
+# define HWCAP_CE_SHA256 (1 << 3)
|
|
+#elif defined(__aarch64__)
|
|
+# define HWCAP AT_HWCAP
|
|
+# define HWCAP_NEON (1 << 1)
|
|
+
|
|
+# define HWCAP_CE HWCAP
|
|
+# define HWCAP_CE_AES (1 << 3)
|
|
+# define HWCAP_CE_PMULL (1 << 4)
|
|
+# define HWCAP_CE_SHA1 (1 << 5)
|
|
+# define HWCAP_CE_SHA256 (1 << 6)
|
|
+# define HWCAP_CPUID (1 << 11)
|
|
+# define HWCAP_SHA3 (1 << 17)
|
|
+# define HWCAP_CE_SM3 (1 << 18)
|
|
+# define HWCAP_CE_SM4 (1 << 19)
|
|
+# define HWCAP_CE_SHA512 (1 << 21)
|
|
+# define HWCAP_SVE (1 << 22)
|
|
+/* AT_HWCAP2 */
|
|
+# define HWCAP2 26
|
|
+# define HWCAP2_SVE2 (1 << 1)
|
|
+# define HWCAP2_RNG (1 << 16)
|
|
+#endif
|
|
+
|
|
enum alg_dev_type {
|
|
UADK_ALG_SOFT = 0x0,
|
|
UADK_ALG_CE_INSTR = 0x1,
|
|
diff --git a/wd_alg.c b/wd_alg.c
|
|
index 3b111c8..f34a407 100644
|
|
--- a/wd_alg.c
|
|
+++ b/wd_alg.c
|
|
@@ -9,6 +9,7 @@
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <pthread.h>
|
|
+#include <sys/auxv.h>
|
|
|
|
#include "wd.h"
|
|
#include "wd_alg.h"
|
|
@@ -90,6 +91,24 @@ static bool wd_check_accel_dev(const char *dev_name)
|
|
return false;
|
|
}
|
|
|
|
+static bool wd_check_ce_support(const char *dev_name)
|
|
+{
|
|
+ unsigned long hwcaps = 0;
|
|
+
|
|
+ #if defined(__arm__) || defined(__arm)
|
|
+ hwcaps = getauxval(AT_HWCAP2);
|
|
+ #elif defined(__aarch64__)
|
|
+ hwcaps = getauxval(AT_HWCAP);
|
|
+ #endif
|
|
+ if (!strcmp("isa_ce_sm3", dev_name) && (hwcaps & HWCAP_CE_SM3))
|
|
+ return true;
|
|
+
|
|
+ if (!strcmp("isa_ce_sm4", dev_name) && (hwcaps & HWCAP_CE_SM4))
|
|
+ return true;
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
static bool wd_alg_check_available(int calc_type, const char *dev_name)
|
|
{
|
|
bool ret = false;
|
|
@@ -99,6 +118,7 @@ static bool wd_alg_check_available(int calc_type, const char *dev_name)
|
|
break;
|
|
/* Should find the CPU if not support CE */
|
|
case UADK_ALG_CE_INSTR:
|
|
+ ret = wd_check_ce_support(dev_name);
|
|
break;
|
|
/* Should find the CPU if not support SVE */
|
|
case UADK_ALG_SVE_INSTR:
|
|
@@ -280,8 +300,13 @@ struct wd_alg_driver *wd_request_drv(const char *alg_name, bool hw_mask)
|
|
struct wd_alg_driver *drv = NULL;
|
|
int tmp_priority = -1;
|
|
|
|
- if (!pnext || !alg_name) {
|
|
- WD_ERR("invalid: request alg param is error!\n");
|
|
+ if (!pnext) {
|
|
+ WD_ERR("invalid: requset drv pnext is NULL!\n");
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ if (!alg_name) {
|
|
+ WD_ERR("invalid: alg_name is NULL!\n");
|
|
return NULL;
|
|
}
|
|
|
|
@@ -289,7 +314,8 @@ struct wd_alg_driver *wd_request_drv(const char *alg_name, bool hw_mask)
|
|
pthread_mutex_lock(&mutex);
|
|
while (pnext) {
|
|
/* hw_mask true mean not to used hardware dev */
|
|
- if (hw_mask && pnext->drv->calc_type == UADK_ALG_HW) {
|
|
+ if ((hw_mask && pnext->drv->calc_type == UADK_ALG_HW) ||
|
|
+ (!hw_mask && pnext->drv->calc_type != UADK_ALG_HW)) {
|
|
pnext = pnext->next;
|
|
continue;
|
|
}
|
|
diff --git a/wd_digest.c b/wd_digest.c
|
|
index c59184d..491502a 100644
|
|
--- a/wd_digest.c
|
|
+++ b/wd_digest.c
|
|
@@ -222,7 +222,7 @@ static void wd_digest_clear_status(void)
|
|
}
|
|
|
|
static int wd_digest_init_nolock(struct wd_ctx_config *config,
|
|
- struct wd_sched *sched)
|
|
+ struct wd_sched *sched)
|
|
{
|
|
int ret;
|
|
|
|
diff --git a/wd_sched.c b/wd_sched.c
|
|
index 419280e..b43834d 100644
|
|
--- a/wd_sched.c
|
|
+++ b/wd_sched.c
|
|
@@ -453,7 +453,7 @@ static struct wd_sched sched_table[SCHED_POLICY_BUTT] = {
|
|
.poll_policy = session_sched_poll_policy,
|
|
}, {
|
|
.name = "None scheduler",
|
|
- .sched_policy = SCHED_POLICY_SINGLE,
|
|
+ .sched_policy = SCHED_POLICY_NONE,
|
|
.sched_init = sched_none_init,
|
|
.pick_next_ctx = sched_none_pick_next_ctx,
|
|
.poll_policy = sched_none_poll_policy,
|
|
diff --git a/wd_util.c b/wd_util.c
|
|
index 6134239..39909ca 100644
|
|
--- a/wd_util.c
|
|
+++ b/wd_util.c
|
|
@@ -91,6 +91,11 @@ struct acc_alg_item {
|
|
char *algtype;
|
|
};
|
|
|
|
+struct wd_ce_ctx {
|
|
+ char *drv_name;
|
|
+ void *priv;
|
|
+};
|
|
+
|
|
static struct acc_alg_item alg_options[] = {
|
|
{"zlib", "zlib"},
|
|
{"gzip", "gzip"},
|
|
@@ -229,7 +234,6 @@ int wd_init_ctx_config(struct wd_ctx_config_internal *in,
|
|
ret = -WD_EINVAL;
|
|
goto err_out;
|
|
}
|
|
-
|
|
clone_ctx_to_internal(cfg->ctxs + i, ctxs + i);
|
|
ret = pthread_spin_init(&ctxs[i].lock, PTHREAD_PROCESS_SHARED);
|
|
if (ret) {
|
|
@@ -2612,14 +2616,44 @@ out_freelist:
|
|
return ret;
|
|
}
|
|
|
|
+static int wd_alg_ce_ctx_init(struct wd_init_attrs *attrs)
|
|
+{
|
|
+ struct wd_ctx_config *ctx_config = attrs->ctx_config;
|
|
+
|
|
+ ctx_config->ctx_num = 1;
|
|
+ ctx_config->ctxs = calloc(ctx_config->ctx_num, sizeof(struct wd_ctx));
|
|
+ if (!ctx_config->ctxs) {
|
|
+ return -WD_ENOMEM;
|
|
+ WD_ERR("failed to alloc ctxs!\n");
|
|
+ }
|
|
+ ctx_config->ctxs[0].ctx = (handle_t)calloc(1, sizeof(struct wd_ce_ctx));
|
|
+
|
|
+ return WD_SUCCESS;
|
|
+}
|
|
+
|
|
+static void wd_alg_ce_ctx_uninit(struct wd_ctx_config *ctx_config)
|
|
+{
|
|
+ __u32 i;
|
|
+
|
|
+ for (i = 0; i < ctx_config->ctx_num; i++) {
|
|
+ if (ctx_config->ctxs[i].ctx) {
|
|
+ free((struct wd_ce_ctx *)ctx_config->ctxs[i].ctx);
|
|
+ ctx_config->ctxs[i].ctx = 0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ free(ctx_config->ctxs);
|
|
+}
|
|
+
|
|
static void wd_alg_ctx_uninit(struct wd_ctx_config *ctx_config)
|
|
{
|
|
__u32 i;
|
|
|
|
- for (i = 0; i < ctx_config->ctx_num; i++)
|
|
+ for (i = 0; i < ctx_config->ctx_num; i++) {
|
|
if (ctx_config->ctxs[i].ctx) {
|
|
wd_release_ctx(ctx_config->ctxs[i].ctx);
|
|
ctx_config->ctxs[i].ctx = 0;
|
|
+ }
|
|
}
|
|
|
|
free(ctx_config->ctxs);
|
|
@@ -2633,9 +2667,9 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs)
|
|
struct wd_ctx_config *ctx_config = NULL;
|
|
struct wd_sched *alg_sched = NULL;
|
|
char alg_type[CRYPTO_MAX_ALG_NAME];
|
|
- char *alg = attrs->alg;
|
|
int driver_type = UADK_ALG_HW;
|
|
- int ret;
|
|
+ char *alg = attrs->alg;
|
|
+ int ret = 0;
|
|
|
|
if (!attrs->ctx_params)
|
|
return -WD_EINVAL;
|
|
@@ -2646,22 +2680,37 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs)
|
|
switch (driver_type) {
|
|
case UADK_ALG_SOFT:
|
|
case UADK_ALG_CE_INSTR:
|
|
- /* No need to alloc resource */
|
|
- if (sched_type != SCHED_POLICY_NONE)
|
|
+ /* No need to alloc resource */
|
|
+ if (sched_type != SCHED_POLICY_NONE) {
|
|
+ WD_ERR("invalid sched_type\n");
|
|
return -WD_EINVAL;
|
|
+ }
|
|
+
|
|
+ ctx_config = calloc(1, sizeof(*ctx_config));
|
|
+ if (!ctx_config) {
|
|
+ WD_ERR("fail to alloc ctx config\n");
|
|
+ return -WD_ENOMEM;
|
|
+ }
|
|
+ attrs->ctx_config = ctx_config;
|
|
|
|
alg_sched = wd_sched_rr_alloc(SCHED_POLICY_NONE, 1, 1, alg_poll_func);
|
|
if (!alg_sched) {
|
|
WD_ERR("fail to alloc scheduler\n");
|
|
- return -WD_EINVAL;
|
|
+ goto out_ctx_config;
|
|
}
|
|
+
|
|
attrs->sched = alg_sched;
|
|
|
|
- ret = wd_sched_rr_instance(alg_sched, NULL);
|
|
+ ret = wd_alg_ce_ctx_init(attrs);
|
|
if (ret) {
|
|
- WD_ERR("fail to instance scheduler\n");
|
|
+ WD_ERR("fail to init ce ctx\n");
|
|
goto out_freesched;
|
|
}
|
|
+
|
|
+ ret = alg_init_func(ctx_config, alg_sched);
|
|
+ if (ret)
|
|
+ goto out_pre_init;
|
|
+
|
|
break;
|
|
case UADK_ALG_SVE_INSTR:
|
|
/* Todo lock cpu core */
|
|
@@ -2720,7 +2769,10 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs)
|
|
return 0;
|
|
|
|
out_pre_init:
|
|
- wd_alg_ctx_uninit(ctx_config);
|
|
+ if (driver_type == UADK_ALG_CE_INSTR || driver_type == UADK_ALG_SOFT)
|
|
+ wd_alg_ce_ctx_uninit(ctx_config);
|
|
+ else
|
|
+ wd_alg_ctx_uninit(ctx_config);
|
|
out_freesched:
|
|
wd_sched_rr_release(alg_sched);
|
|
out_ctx_config:
|
|
@@ -2733,10 +2785,19 @@ void wd_alg_attrs_uninit(struct wd_init_attrs *attrs)
|
|
{
|
|
struct wd_ctx_config *ctx_config = attrs->ctx_config;
|
|
struct wd_sched *alg_sched = attrs->sched;
|
|
+ int driver_type = attrs->driver->calc_type;
|
|
|
|
- if (ctx_config) {
|
|
- wd_alg_ctx_uninit(ctx_config);
|
|
- free(ctx_config);
|
|
+ if (driver_type == UADK_ALG_CE_INSTR || driver_type == UADK_ALG_SOFT) {
|
|
+ if (ctx_config) {
|
|
+ wd_alg_ce_ctx_uninit(ctx_config);
|
|
+ free(ctx_config);
|
|
+ }
|
|
+ } else {
|
|
+ if (ctx_config) {
|
|
+ wd_alg_ctx_uninit(ctx_config);
|
|
+ free(ctx_config);
|
|
+ }
|
|
}
|
|
+
|
|
wd_sched_rr_release(alg_sched);
|
|
}
|
|
--
|
|
2.25.1
|
|
|