qemu/nbd-allow-reconnect-on-open-with-corresponding-new-o.patch
Zhang Bo 6e9beed308 backport nbd related patches to avoid vm crash during migration
block-nbd was refacted during release 6.2.0, but we didn't induced
all the needed patches within the 6.2.0 baseline, which leads to
vm crash during migration.
the reasons are as below:
when iothread is configured, the coroutines should get back to
the exact iothread that was out of. But within the 6.2.0 baseline,
patches were missing, nbd related coroutine didn't have its related
aio_context. It in fact get to the mainline aio_context, the mistaken
context leads to vm crash.
2022-08-29 17:09:05 +08:00

139 lines
4.7 KiB
Diff

From eb42fba27842e3ebc342f15847863b5e812a7919 Mon Sep 17 00:00:00 2001
From: Zhang Bo <oscar.zhangbo@huawei.com>
Date: Mon, 29 Aug 2022 15:28:55 +0800
Subject: [PATCH 1/5] nbd: allow reconnect on open, with corresponding new
options
It is useful when start of vm and start of nbd server are not
simple to sync.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Zhang Bo <oscar.zhangbo@huawei.com>
---
block/nbd.c | 45 +++++++++++++++++++++++++++++++++++++++++++-
qapi/block-core.json | 9 ++++++++-
2 files changed, 52 insertions(+), 2 deletions(-)
diff --git a/block/nbd.c b/block/nbd.c
index 5ef462db1b..63dbfa807d 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -80,6 +80,7 @@ typedef struct BDRVNBDState {
NBDClientState state;
QEMUTimer *reconnect_delay_timer;
+ QEMUTimer *open_timer;
NBDClientRequest requests[MAX_NBD_REQUESTS];
NBDReply reply;
@@ -87,6 +88,7 @@ typedef struct BDRVNBDState {
/* Connection parameters */
uint32_t reconnect_delay;
+ uint32_t open_timeout;
SocketAddress *saddr;
char *export, *tlscredsid;
QCryptoTLSCreds *tlscreds;
@@ -218,6 +220,32 @@ static void nbd_teardown_connection(BlockDriverState *bs)
s->state = NBD_CLIENT_QUIT;
}
+static void open_timer_del(BDRVNBDState *s)
+{
+ if (s->open_timer) {
+ timer_free(s->open_timer);
+ s->open_timer = NULL;
+ }
+}
+
+static void open_timer_cb(void *opaque)
+{
+ BDRVNBDState *s = opaque;
+
+ nbd_co_establish_connection_cancel(s->conn);
+ open_timer_del(s);
+}
+
+static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
+{
+ assert(!s->open_timer);
+ s->open_timer = aio_timer_new(bdrv_get_aio_context(s->bs),
+ QEMU_CLOCK_REALTIME,
+ SCALE_NS,
+ open_timer_cb, s);
+ timer_mod(s->open_timer, expire_time_ns);
+}
+
static bool nbd_client_connecting(BDRVNBDState *s)
{
NBDClientState state = qatomic_load_acquire(&s->state);
@@ -1742,6 +1770,15 @@ static QemuOptsList nbd_runtime_opts = {
"future requests before a successful reconnect will "
"immediately fail. Default 0",
},
+ {
+ .name = "open-timeout",
+ .type = QEMU_OPT_NUMBER,
+ .help = "In seconds. If zero, the nbd driver tries the connection "
+ "only once, and fails to open if the connection fails. "
+ "If non-zero, the nbd driver will repeat connection "
+ "attempts until successful or until @open-timeout seconds "
+ "have elapsed. Default 0",
+ },
{ /* end of list */ }
},
};
@@ -1797,6 +1834,7 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
}
s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
+ s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0);
ret = 0;
@@ -1828,7 +1866,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
s->conn = nbd_client_connection_new(s->saddr, true, s->export,
s->x_dirty_bitmap, s->tlscreds);
- /* TODO: Configurable retry-until-timeout behaviour. */
+ if (s->open_timeout) {
+ nbd_client_connection_enable_retry(s->conn);
+ open_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+ s->open_timeout * NANOSECONDS_PER_SECOND);
+ }
+
s->state = NBD_CLIENT_CONNECTING_WAIT;
ret = nbd_do_establish_connection(bs, errp);
if (ret < 0) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e65fabe36d..618e417135 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4096,6 +4096,12 @@
# future requests before a successful reconnect will
# immediately fail. Default 0 (Since 4.2)
#
+# @open-timeout: In seconds. If zero, the nbd driver tries the connection
+# only once, and fails to open if the connection fails.
+# If non-zero, the nbd driver will repeat connection attempts
+# until successful or until @open-timeout seconds have elapsed.
+# Default 0 (Since 7.0)
+#
# Features:
# @unstable: Member @x-dirty-bitmap is experimental.
#
@@ -4106,7 +4112,8 @@
'*export': 'str',
'*tls-creds': 'str',
'*x-dirty-bitmap': { 'type': 'str', 'features': [ 'unstable' ] },
- '*reconnect-delay': 'uint32' } }
+ '*reconnect-delay': 'uint32',
+ '*open-timeout': 'uint32' } }
##
# @BlockdevOptionsRaw:
--
2.27.0