Replace backport solution to fix race condition in cuse session

This commit is contained in:
Zht-Try 2023-09-05 11:20:26 +08:00
parent bf89d0e913
commit 7eaeef2e44
3 changed files with 187 additions and 126 deletions

View File

@ -1,124 +0,0 @@
From 34555d211c58ac7615d41547f56756ae02d22957 Mon Sep 17 00:00:00 2001
From: suweifeng <suweifeng1@huawei.com>
Date: Tue, 8 Jun 2021 22:11:53 +0800
Subject: [PATCH 26/27] Fix race condition in continuous setup and teardown
cuse session
If we continuous setup and teardown cuse session, It will teardown
uninitialized cuse session and cause segment fault, So add delay until
session created.
Signed-off-by: suweifeng <suweifeng1@huawei.com>
---
lib/nvme/nvme_cuse.c | 41 +++++++++++++++++++++++++++++++++++++----
1 file changed, 37 insertions(+), 4 deletions(-)
diff --git a/lib/nvme/nvme_cuse.c b/lib/nvme/nvme_cuse.c
index 3eccfd0..8f0be31 100644
--- a/lib/nvme/nvme_cuse.c
+++ b/lib/nvme/nvme_cuse.c
@@ -55,6 +55,8 @@ struct cuse_device {
pthread_t tid;
struct fuse_session *session;
+ pthread_cond_t session_cond; /* session condition variable */
+ pthread_mutex_t session_mtx; /* session mutex variable */
struct cuse_device *ctrlr_device;
struct cuse_device *ns_devices; /**< Array of cuse ns devices */
@@ -666,11 +668,17 @@ cuse_thread(void *arg)
cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop,
&multithreaded, cuse_device);
}
+
if (!cuse_device->session) {
SPDK_ERRLOG("Cannot create cuse session\n");
+ pthread_mutex_lock(&cuse_device->session_mtx);
+ pthread_cond_signal(&cuse_device->session_cond);
+ pthread_mutex_unlock(&cuse_device->session_mtx);
goto err;
}
-
+ pthread_mutex_lock(&cuse_device->session_mtx);
+ pthread_cond_signal(&cuse_device->session_cond);
+ pthread_mutex_unlock(&cuse_device->session_mtx);
SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name);
/* Receive and process fuse requests */
@@ -718,13 +726,20 @@ cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid)
free(ns_device);
return -ENAMETOOLONG;
}
-
+ pthread_cond_init(&ns_device->session_cond, NULL);
+ pthread_mutex_init(&ns_device->session_mtx, NULL);
rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device);
if (rv != 0) {
SPDK_ERRLOG("pthread_create failed\n");
return -rv;
}
-
+ pthread_mutex_lock(&ns_device->session_mtx);
+ pthread_cond_wait(&ns_device->session_cond, &ns_device->session_mtx);
+ pthread_mutex_unlock(&ns_device->session_mtx);
+ if (!ns_device->session) {
+ SPDK_ERRLOG("create namespace session failed\n");
+ return -1;
+ }
ns_device->is_started = true;
return 0;
@@ -739,9 +754,10 @@ cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, uint32_t nsid)
if (!ns_device->is_started) {
return;
}
-
fuse_session_exit(ns_device->session);
pthread_join(ns_device->tid, NULL);
+ pthread_cond_destroy(&ns_device->session_cond);
+ pthread_mutex_destroy(&ns_device->session_mtx);
ns_device->is_started = false;
}
@@ -817,8 +833,14 @@ cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device)
cuse_nvme_ns_stop(ctrlr_device, i);
}
+ if (!ctrlr_device->is_started) {
+ return;
+ }
fuse_session_exit(ctrlr_device->session);
pthread_join(ctrlr_device->tid, NULL);
+ pthread_cond_destroy(&ctrlr_device->session_cond);
+ pthread_mutex_destroy(&ctrlr_device->session_mtx);
+ ctrlr_device->is_started = false;
TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq);
spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
@@ -894,12 +916,23 @@ nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d",
ctrlr_device->index);
+ pthread_cond_init(&ctrlr_device->session_cond, NULL);
+ pthread_mutex_init(&ctrlr_device->session_mtx, NULL);
rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device);
if (rv != 0) {
SPDK_ERRLOG("pthread_create failed\n");
rv = -rv;
goto err3;
}
+ pthread_mutex_lock(&ctrlr_device->session_mtx);
+ pthread_cond_wait(&ctrlr_device->session_cond, &ctrlr_device->session_mtx);
+ pthread_mutex_unlock(&ctrlr_device->session_mtx);
+ if (!ctrlr_device->session) {
+ SPDK_ERRLOG("cuse session create failed\n");
+ rv = -1;
+ goto err3;
+ }
+ ctrlr_device->is_started = true;
TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq);
ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device));
--
2.33.0

View File

@ -0,0 +1,182 @@
From d651f8a2385cc40232a9837fc3cf75014700da3e Mon Sep 17 00:00:00 2001
From: Weifeng Su <suweifeng1@huawei.com>
Date: Thu, 10 Jun 2021 10:29:51 +0800
Subject: [PATCH] nvme/nvme_cuse: Fix race condition in cuse session
Conflict:NA
Reference:https://github.com/spdk/spdk/commit/d651f8a2385cc40232a9837fc3cf75014700da3e
If we continuous setup and teardown cuse session, It will teardown
uninitialized cuse session and cause segment fault, New function
cuse_session_create will do the session create operation and under
g_cuse_mtx to avoid this issue.
Signed-off-by: Weifeng Su <suweifeng1@huawei.com>
Change-Id: I2b32e81c0990ede00eea6d4ed3a7e44d534d4df3
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8231
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
---
lib/nvme/nvme_cuse.c | 61 ++++++++++++++++++++++++++++----------------
1 file changed, 39 insertions(+), 22 deletions(-)
diff --git a/lib/nvme/nvme_cuse.c b/lib/nvme/nvme_cuse.c
index 2a38ba4d2..9c3198ee5 100644
--- a/lib/nvme/nvme_cuse.c
+++ b/lib/nvme/nvme_cuse.c
@@ -734,22 +734,14 @@ static const struct cuse_lowlevel_ops cuse_ns_clop = {
.ioctl = cuse_ns_ioctl,
};
-static void *
-cuse_thread(void *arg)
+static int cuse_session_create(struct cuse_device *cuse_device)
{
- struct cuse_device *cuse_device = arg;
char *cuse_argv[] = { "cuse", "-f" };
+ int multithreaded;
int cuse_argc = SPDK_COUNTOF(cuse_argv);
+ struct cuse_info ci;
char devname_arg[128 + 8];
const char *dev_info_argv[] = { devname_arg };
- struct cuse_info ci;
- int multithreaded;
- int rc;
- struct fuse_buf buf = { .mem = NULL };
- struct pollfd fds;
- int timeout_msecs = 500;
-
- spdk_unaffinitize_thread();
snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name);
@@ -765,12 +757,25 @@ cuse_thread(void *arg)
cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop,
&multithreaded, cuse_device);
}
+
if (!cuse_device->session) {
SPDK_ERRLOG("Cannot create cuse session\n");
- goto err;
+ return -1;
}
-
SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name);
+ return 0;
+}
+
+static void *
+cuse_thread(void *arg)
+{
+ struct cuse_device *cuse_device = arg;
+ int rc;
+ struct fuse_buf buf = { .mem = NULL };
+ struct pollfd fds;
+ int timeout_msecs = 500;
+
+ spdk_unaffinitize_thread();
/* Receive and process fuse requests */
fds.fd = fuse_session_fd(cuse_device->session);
@@ -788,7 +793,6 @@ cuse_thread(void *arg)
free(buf.mem);
fuse_session_reset(cuse_device->session);
cuse_lowlevel_teardown(cuse_device->session);
-err:
pthread_exit(NULL);
}
@@ -817,13 +821,15 @@ cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid)
free(ns_device);
return -ENAMETOOLONG;
}
-
+ rv = cuse_session_create(ns_device);
+ if (rv != 0) {
+ return rv;
+ }
rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device);
if (rv != 0) {
SPDK_ERRLOG("pthread_create failed\n");
return -rv;
}
-
ns_device->is_started = true;
return 0;
@@ -916,8 +922,12 @@ cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device)
cuse_nvme_ns_stop(ctrlr_device, i);
}
+ if (!ctrlr_device->is_started) {
+ return;
+ }
fuse_session_exit(ctrlr_device->session);
pthread_join(ctrlr_device->tid, NULL);
+ ctrlr_device->is_started = false;
TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq);
spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
@@ -970,7 +980,7 @@ nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
if (!ctrlr_device) {
SPDK_ERRLOG("Cannot allocate memory for ctrlr_device.");
rv = -ENOMEM;
- goto err2;
+ goto free_device;
}
ctrlr_device->ctrlr = ctrlr;
@@ -981,7 +991,7 @@ nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index);
if (ctrlr_device->index == UINT32_MAX) {
SPDK_ERRLOG("Too many registered controllers\n");
- goto err2;
+ goto free_device;
}
if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) {
@@ -993,12 +1003,19 @@ nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d",
ctrlr_device->index);
+ rv = cuse_session_create(ctrlr_device);
+ if (rv != 0) {
+ goto clear_and_free;
+ }
+
rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device);
if (rv != 0) {
SPDK_ERRLOG("pthread_create failed\n");
rv = -rv;
- goto err3;
+ goto clear_and_free;
}
+ ctrlr_device->is_started = true;
+
TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq);
ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device));
@@ -1007,14 +1024,14 @@ nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
SPDK_ERRLOG("Cannot start CUSE namespace devices.");
cuse_nvme_ctrlr_stop(ctrlr_device);
rv = -1;
- goto err3;
+ goto clear_and_free;
}
return 0;
-err3:
+clear_and_free:
spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
-err2:
+free_device:
free(ctrlr_device);
if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
spdk_bit_array_free(&g_ctrlr_started);
--
2.27.0

View File

@ -4,7 +4,7 @@
Name: spdk
Version: 21.01.1
Release: 16
Release: 17
Summary: Set of libraries and utilities for high performance user-mode storage
License: BSD and MIT
URL: http://spdk.io
@ -34,7 +34,7 @@ Patch22: 0022-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch
Patch23: 0023-spdk-add-nvme-support-for-HSAK.patch
Patch24: 0024-Add-CUSE-switch-for-nvme-ctrlr.patch
Patch25: 0025-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch
Patch26: 0026-Fix-race-condition-in-continuous-setup-and-teardown-.patch
Patch26: 0026-nvme-nvme_cuse-Fix-race-condition-in-cuse-session.patch
Patch27: 0027-Change-log-level-in-poll-timeout.patch
Patch28: 0028-configure-add-CONFIG_HAVE_ARC4RANDOM.patch
Patch29: 0029-Enable-unittest-in-make-check.patch
@ -228,6 +228,9 @@ mv doc/output/html/ %{install_docdir}
%changelog
* Tue Sep 5 2023 Hongtao Zhang <zhanghongtao22@huawei.com> - 21.01.1-17
- Replace backport solution to fix race condition in cuse session
* Fri Sep 1 2023 Xue Liu <liuxue@loongson.cn> - 21.01.1-16
- Add support for LOONGARCH.