block-nbd was refacted during release 6.2.0, but we didn't induced all the needed patches within the 6.2.0 baseline, which leads to vm crash during migration. the reasons are as below: when iothread is configured, the coroutines should get back to the exact iothread that was out of. But within the 6.2.0 baseline, patches were missing, nbd related coroutine didn't have its related aio_context. It in fact get to the mainline aio_context, the mistaken context leads to vm crash.
139 lines
4.7 KiB
Diff
139 lines
4.7 KiB
Diff
From eb42fba27842e3ebc342f15847863b5e812a7919 Mon Sep 17 00:00:00 2001
|
|
From: Zhang Bo <oscar.zhangbo@huawei.com>
|
|
Date: Mon, 29 Aug 2022 15:28:55 +0800
|
|
Subject: [PATCH 1/5] nbd: allow reconnect on open, with corresponding new
|
|
options
|
|
|
|
It is useful when start of vm and start of nbd server are not
|
|
simple to sync.
|
|
|
|
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
|
Signed-off-by: Zhang Bo <oscar.zhangbo@huawei.com>
|
|
---
|
|
block/nbd.c | 45 +++++++++++++++++++++++++++++++++++++++++++-
|
|
qapi/block-core.json | 9 ++++++++-
|
|
2 files changed, 52 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/block/nbd.c b/block/nbd.c
|
|
index 5ef462db1b..63dbfa807d 100644
|
|
--- a/block/nbd.c
|
|
+++ b/block/nbd.c
|
|
@@ -80,6 +80,7 @@ typedef struct BDRVNBDState {
|
|
NBDClientState state;
|
|
|
|
QEMUTimer *reconnect_delay_timer;
|
|
+ QEMUTimer *open_timer;
|
|
|
|
NBDClientRequest requests[MAX_NBD_REQUESTS];
|
|
NBDReply reply;
|
|
@@ -87,6 +88,7 @@ typedef struct BDRVNBDState {
|
|
|
|
/* Connection parameters */
|
|
uint32_t reconnect_delay;
|
|
+ uint32_t open_timeout;
|
|
SocketAddress *saddr;
|
|
char *export, *tlscredsid;
|
|
QCryptoTLSCreds *tlscreds;
|
|
@@ -218,6 +220,32 @@ static void nbd_teardown_connection(BlockDriverState *bs)
|
|
s->state = NBD_CLIENT_QUIT;
|
|
}
|
|
|
|
+static void open_timer_del(BDRVNBDState *s)
|
|
+{
|
|
+ if (s->open_timer) {
|
|
+ timer_free(s->open_timer);
|
|
+ s->open_timer = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void open_timer_cb(void *opaque)
|
|
+{
|
|
+ BDRVNBDState *s = opaque;
|
|
+
|
|
+ nbd_co_establish_connection_cancel(s->conn);
|
|
+ open_timer_del(s);
|
|
+}
|
|
+
|
|
+static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
|
|
+{
|
|
+ assert(!s->open_timer);
|
|
+ s->open_timer = aio_timer_new(bdrv_get_aio_context(s->bs),
|
|
+ QEMU_CLOCK_REALTIME,
|
|
+ SCALE_NS,
|
|
+ open_timer_cb, s);
|
|
+ timer_mod(s->open_timer, expire_time_ns);
|
|
+}
|
|
+
|
|
static bool nbd_client_connecting(BDRVNBDState *s)
|
|
{
|
|
NBDClientState state = qatomic_load_acquire(&s->state);
|
|
@@ -1742,6 +1770,15 @@ static QemuOptsList nbd_runtime_opts = {
|
|
"future requests before a successful reconnect will "
|
|
"immediately fail. Default 0",
|
|
},
|
|
+ {
|
|
+ .name = "open-timeout",
|
|
+ .type = QEMU_OPT_NUMBER,
|
|
+ .help = "In seconds. If zero, the nbd driver tries the connection "
|
|
+ "only once, and fails to open if the connection fails. "
|
|
+ "If non-zero, the nbd driver will repeat connection "
|
|
+ "attempts until successful or until @open-timeout seconds "
|
|
+ "have elapsed. Default 0",
|
|
+ },
|
|
{ /* end of list */ }
|
|
},
|
|
};
|
|
@@ -1797,6 +1834,7 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
|
|
}
|
|
|
|
s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
|
|
+ s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0);
|
|
|
|
ret = 0;
|
|
|
|
@@ -1828,7 +1866,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
|
|
s->conn = nbd_client_connection_new(s->saddr, true, s->export,
|
|
s->x_dirty_bitmap, s->tlscreds);
|
|
|
|
- /* TODO: Configurable retry-until-timeout behaviour. */
|
|
+ if (s->open_timeout) {
|
|
+ nbd_client_connection_enable_retry(s->conn);
|
|
+ open_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
|
|
+ s->open_timeout * NANOSECONDS_PER_SECOND);
|
|
+ }
|
|
+
|
|
s->state = NBD_CLIENT_CONNECTING_WAIT;
|
|
ret = nbd_do_establish_connection(bs, errp);
|
|
if (ret < 0) {
|
|
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
|
index e65fabe36d..618e417135 100644
|
|
--- a/qapi/block-core.json
|
|
+++ b/qapi/block-core.json
|
|
@@ -4096,6 +4096,12 @@
|
|
# future requests before a successful reconnect will
|
|
# immediately fail. Default 0 (Since 4.2)
|
|
#
|
|
+# @open-timeout: In seconds. If zero, the nbd driver tries the connection
|
|
+# only once, and fails to open if the connection fails.
|
|
+# If non-zero, the nbd driver will repeat connection attempts
|
|
+# until successful or until @open-timeout seconds have elapsed.
|
|
+# Default 0 (Since 7.0)
|
|
+#
|
|
# Features:
|
|
# @unstable: Member @x-dirty-bitmap is experimental.
|
|
#
|
|
@@ -4106,7 +4112,8 @@
|
|
'*export': 'str',
|
|
'*tls-creds': 'str',
|
|
'*x-dirty-bitmap': { 'type': 'str', 'features': [ 'unstable' ] },
|
|
- '*reconnect-delay': 'uint32' } }
|
|
+ '*reconnect-delay': 'uint32',
|
|
+ '*open-timeout': 'uint32' } }
|
|
|
|
##
|
|
# @BlockdevOptionsRaw:
|
|
--
|
|
2.27.0
|
|
|