!29 asyn-fix-qemu-hang
Merge pull request !29 from FangYing/async-fix-qemu-hang
This commit is contained in:
commit
df3f209a2a
@ -0,0 +1,120 @@
|
|||||||
|
From a2bae876b7f694b12073bac8ad6668e4d975ad88 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ying Fang <fangying1@huawei.com>
|
||||||
|
Date: Fri, 10 Apr 2020 16:08:19 +0000
|
||||||
|
Subject: [PATCH 1/2] aio-wait: delegate polling of main AioContext if BQL not
|
||||||
|
held
|
||||||
|
|
||||||
|
Any thread that is not a iothread returns NULL for qemu_get_current_aio_context().
|
||||||
|
As a result, it would also return true for
|
||||||
|
in_aio_context_home_thread(qemu_get_aio_context()), causing
|
||||||
|
AIO_WAIT_WHILE to invoke aio_poll() directly. This is incorrect
|
||||||
|
if the BQL is not held, because aio_poll() does not expect to
|
||||||
|
run concurrently from multiple threads, and it can actually
|
||||||
|
happen when savevm writes to the vmstate file from the
|
||||||
|
migration thread.
|
||||||
|
|
||||||
|
Therefore, restrict in_aio_context_home_thread to return true
|
||||||
|
for the main AioContext only if the BQL is held.
|
||||||
|
|
||||||
|
The function is moved to aio-wait.h because it is mostly used
|
||||||
|
there and to avoid a circular reference between main-loop.h
|
||||||
|
and block/aio.h.
|
||||||
|
|
||||||
|
upstream_url: https://patchwork.kernel.org/patch/11482099/
|
||||||
|
|
||||||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
Message-Id: <20200407140746.8041-5-pbonzini@redhat.com>
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
---
|
||||||
|
include/block/aio-wait.h | 22 ++++++++++++++++++++++
|
||||||
|
include/block/aio.h | 29 ++++++++++-------------------
|
||||||
|
2 files changed, 32 insertions(+), 19 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||||
|
index afd0ff7e..d349e7e3 100644
|
||||||
|
--- a/include/block/aio-wait.h
|
||||||
|
+++ b/include/block/aio-wait.h
|
||||||
|
@@ -26,6 +26,7 @@
|
||||||
|
#define QEMU_AIO_WAIT_H
|
||||||
|
|
||||||
|
#include "block/aio.h"
|
||||||
|
+#include "qemu/main-loop.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AioWait:
|
||||||
|
@@ -124,4 +125,25 @@ void aio_wait_kick(void);
|
||||||
|
*/
|
||||||
|
void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * in_aio_context_home_thread:
|
||||||
|
+ * @ctx: the aio context
|
||||||
|
+ *
|
||||||
|
+ * Return whether we are running in the thread that normally runs @ctx. Note
|
||||||
|
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
|
||||||
|
+ * still only has one home thread that is responsible for running it.
|
||||||
|
+ */
|
||||||
|
+static inline bool in_aio_context_home_thread(AioContext *ctx)
|
||||||
|
+{
|
||||||
|
+ if (ctx == qemu_get_current_aio_context()) {
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (ctx == qemu_get_aio_context()) {
|
||||||
|
+ return qemu_mutex_iothread_locked();
|
||||||
|
+ } else {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#endif /* QEMU_AIO_WAIT */
|
||||||
|
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||||
|
index 0ca25dfe..c527893b 100644
|
||||||
|
--- a/include/block/aio.h
|
||||||
|
+++ b/include/block/aio.h
|
||||||
|
@@ -61,12 +61,16 @@ struct AioContext {
|
||||||
|
QLIST_HEAD(, AioHandler) aio_handlers;
|
||||||
|
|
||||||
|
/* Used to avoid unnecessary event_notifier_set calls in aio_notify;
|
||||||
|
- * accessed with atomic primitives. If this field is 0, everything
|
||||||
|
- * (file descriptors, bottom halves, timers) will be re-evaluated
|
||||||
|
- * before the next blocking poll(), thus the event_notifier_set call
|
||||||
|
- * can be skipped. If it is non-zero, you may need to wake up a
|
||||||
|
- * concurrent aio_poll or the glib main event loop, making
|
||||||
|
- * event_notifier_set necessary.
|
||||||
|
+ * only written from the AioContext home thread, or under the BQL in
|
||||||
|
+ * the case of the main AioContext. However, it is read from any
|
||||||
|
+ * thread so it is still accessed with atomic primitives.
|
||||||
|
+ *
|
||||||
|
+ * If this field is 0, everything (file descriptors, bottom halves,
|
||||||
|
+ * timers) will be re-evaluated before the next blocking poll() or
|
||||||
|
+ * io_uring wait; therefore, the event_notifier_set call can be
|
||||||
|
+ * skipped. If it is non-zero, you may need to wake up a concurrent
|
||||||
|
+ * aio_poll or the glib main event loop, making event_notifier_set
|
||||||
|
+ * necessary.
|
||||||
|
*
|
||||||
|
* Bit 0 is reserved for GSource usage of the AioContext, and is 1
|
||||||
|
* between a call to aio_ctx_prepare and the next call to aio_ctx_check.
|
||||||
|
@@ -581,19 +585,6 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
|
||||||
|
*/
|
||||||
|
AioContext *qemu_get_current_aio_context(void);
|
||||||
|
|
||||||
|
-/**
|
||||||
|
- * in_aio_context_home_thread:
|
||||||
|
- * @ctx: the aio context
|
||||||
|
- *
|
||||||
|
- * Return whether we are running in the thread that normally runs @ctx. Note
|
||||||
|
- * that acquiring/releasing ctx does not affect the outcome, each AioContext
|
||||||
|
- * still only has one home thread that is responsible for running it.
|
||||||
|
- */
|
||||||
|
-static inline bool in_aio_context_home_thread(AioContext *ctx)
|
||||||
|
-{
|
||||||
|
- return ctx == qemu_get_current_aio_context();
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
/**
|
||||||
|
* aio_context_setup:
|
||||||
|
* @ctx: the aio context
|
||||||
|
--
|
||||||
|
2.25.2
|
||||||
|
|
||||||
171
async-use-explicit-memory-barriers.patch
Normal file
171
async-use-explicit-memory-barriers.patch
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
From 787af8ed2bc86dc8688727d62a251965d9c42e00 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ying Fang <fangying1@huawei.com>
|
||||||
|
Date: Fri, 10 Apr 2020 16:19:50 +0000
|
||||||
|
Subject: [PATCH 2/2] async: use explicit memory barriers
|
||||||
|
|
||||||
|
When using C11 atomics, non-seqcst reads and writes do not participate
|
||||||
|
in the total order of seqcst operations. In util/async.c and util/aio-posix.c,
|
||||||
|
in particular, the pattern that we use
|
||||||
|
|
||||||
|
write ctx->notify_me write bh->scheduled
|
||||||
|
read bh->scheduled read ctx->notify_me
|
||||||
|
if !bh->scheduled, sleep if ctx->notify_me, notify
|
||||||
|
|
||||||
|
needs to use seqcst operations for both the write and the read. In
|
||||||
|
general this is something that we do not want, because there can be
|
||||||
|
many sources that are polled in addition to bottom halves. The
|
||||||
|
alternative is to place a seqcst memory barrier between the write
|
||||||
|
and the read. This also comes with a disadvantage, in that the
|
||||||
|
memory barrier is implicit on strongly-ordered architectures and
|
||||||
|
it wastes a few dozen clock cycles.
|
||||||
|
|
||||||
|
Fortunately, ctx->notify_me is never written concurrently by two
|
||||||
|
threads, so we can assert that and relax the writes to ctx->notify_me.
|
||||||
|
The resulting solution works and performs well on both aarch64 and x86.
|
||||||
|
|
||||||
|
Note that the atomic_set/atomic_read combination is not an atomic
|
||||||
|
read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED;
|
||||||
|
on x86, ATOMIC_RELAXED compiles to a locked operation.
|
||||||
|
|
||||||
|
upstream_url: https://patchwork.kernel.org/patch/11482103/
|
||||||
|
|
||||||
|
Analyzed-by: Ying Fang <fangying1@huawei.com>
|
||||||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
Tested-by: Ying Fang <fangying1@huawei.com>
|
||||||
|
Message-Id: <20200407140746.8041-6-pbonzini@redhat.com>
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
---
|
||||||
|
util/aio-posix.c | 16 ++++++++++++++--
|
||||||
|
util/aio-win32.c | 17 ++++++++++++++---
|
||||||
|
util/async.c | 16 ++++++++++++----
|
||||||
|
3 files changed, 40 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/util/aio-posix.c b/util/aio-posix.c
|
||||||
|
index 6fbfa792..ca58b9a4 100644
|
||||||
|
--- a/util/aio-posix.c
|
||||||
|
+++ b/util/aio-posix.c
|
||||||
|
@@ -613,6 +613,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||||
|
int64_t timeout;
|
||||||
|
int64_t start = 0;
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
|
||||||
|
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
|
||||||
|
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
|
||||||
|
+ */
|
||||||
|
assert(in_aio_context_home_thread(ctx));
|
||||||
|
|
||||||
|
/* aio_notify can avoid the expensive event_notifier_set if
|
||||||
|
@@ -623,7 +628,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||||
|
* so disable the optimization now.
|
||||||
|
*/
|
||||||
|
if (blocking) {
|
||||||
|
- atomic_add(&ctx->notify_me, 2);
|
||||||
|
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
|
||||||
|
+ /*
|
||||||
|
+ * Write ctx->notify_me before computing the timeout
|
||||||
|
+ * (reading bottom half flags, etc.). Pairs with
|
||||||
|
+ * smp_mb in aio_notify().
|
||||||
|
+ */
|
||||||
|
+ smp_mb();
|
||||||
|
}
|
||||||
|
|
||||||
|
qemu_lockcnt_inc(&ctx->list_lock);
|
||||||
|
@@ -668,7 +679,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (blocking) {
|
||||||
|
- atomic_sub(&ctx->notify_me, 2);
|
||||||
|
+ /* Finish the poll before clearing the flag. */
|
||||||
|
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
|
||||||
|
aio_notify_accept(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/util/aio-win32.c b/util/aio-win32.c
|
||||||
|
index a23b9c36..729d533f 100644
|
||||||
|
--- a/util/aio-win32.c
|
||||||
|
+++ b/util/aio-win32.c
|
||||||
|
@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||||
|
int count;
|
||||||
|
int timeout;
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
|
||||||
|
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
|
||||||
|
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
|
||||||
|
+ */
|
||||||
|
+ assert(in_aio_context_home_thread(ctx));
|
||||||
|
progress = false;
|
||||||
|
|
||||||
|
/* aio_notify can avoid the expensive event_notifier_set if
|
||||||
|
@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||||
|
* so disable the optimization now.
|
||||||
|
*/
|
||||||
|
if (blocking) {
|
||||||
|
- atomic_add(&ctx->notify_me, 2);
|
||||||
|
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
|
||||||
|
+ /*
|
||||||
|
+ * Write ctx->notify_me before computing the timeout
|
||||||
|
+ * (reading bottom half flags, etc.). Pairs with
|
||||||
|
+ * smp_mb in aio_notify().
|
||||||
|
+ */
|
||||||
|
+ smp_mb();
|
||||||
|
}
|
||||||
|
|
||||||
|
qemu_lockcnt_inc(&ctx->list_lock);
|
||||||
|
@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||||
|
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
|
||||||
|
if (blocking) {
|
||||||
|
assert(first);
|
||||||
|
- assert(in_aio_context_home_thread(ctx));
|
||||||
|
- atomic_sub(&ctx->notify_me, 2);
|
||||||
|
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
|
||||||
|
aio_notify_accept(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/util/async.c b/util/async.c
|
||||||
|
index afc17fb3..12b33204 100644
|
||||||
|
--- a/util/async.c
|
||||||
|
+++ b/util/async.c
|
||||||
|
@@ -221,7 +221,14 @@ aio_ctx_prepare(GSource *source, gint *timeout)
|
||||||
|
{
|
||||||
|
AioContext *ctx = (AioContext *) source;
|
||||||
|
|
||||||
|
- atomic_or(&ctx->notify_me, 1);
|
||||||
|
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Write ctx->notify_me before computing the timeout
|
||||||
|
+ * (reading bottom half flags, etc.). Pairs with
|
||||||
|
+ * smp_mb in aio_notify().
|
||||||
|
+ */
|
||||||
|
+ smp_mb();
|
||||||
|
|
||||||
|
/* We assume there is no timeout already supplied */
|
||||||
|
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
|
||||||
|
@@ -239,7 +246,8 @@ aio_ctx_check(GSource *source)
|
||||||
|
AioContext *ctx = (AioContext *) source;
|
||||||
|
QEMUBH *bh;
|
||||||
|
|
||||||
|
- atomic_and(&ctx->notify_me, ~1);
|
||||||
|
+ /* Finish computing the timeout before clearing the flag. */
|
||||||
|
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
|
||||||
|
aio_notify_accept(ctx);
|
||||||
|
|
||||||
|
for (bh = ctx->first_bh; bh; bh = bh->next) {
|
||||||
|
@@ -344,10 +352,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx)
|
||||||
|
void aio_notify(AioContext *ctx)
|
||||||
|
{
|
||||||
|
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
|
||||||
|
- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
|
||||||
|
+ * with smp_mb in aio_ctx_prepare or aio_poll.
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
- if (ctx->notify_me) {
|
||||||
|
+ if (atomic_read(&ctx->notify_me)) {
|
||||||
|
event_notifier_set(&ctx->notifier);
|
||||||
|
atomic_mb_set(&ctx->notified, true);
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.25.2
|
||||||
|
|
||||||
@ -61,7 +61,8 @@ Patch0048: COLO-compare-Fix-incorrect-if-logic.patch
|
|||||||
Patch0049: qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch
|
Patch0049: qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch
|
||||||
Patch0050: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch
|
Patch0050: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch
|
||||||
Patch0051: pcie-Compat-with-devices-which-do-not-support-Link-W.patch
|
Patch0051: pcie-Compat-with-devices-which-do-not-support-Link-W.patch
|
||||||
Patch0052: util-async-Add-memory-barrier-to-aio_ctx_prepare.patch
|
Patch0052: aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch
|
||||||
|
Patch0053: async-use-explicit-memory-barriers.patch
|
||||||
|
|
||||||
BuildRequires: flex
|
BuildRequires: flex
|
||||||
BuildRequires: bison
|
BuildRequires: bison
|
||||||
@ -397,8 +398,9 @@ getent passwd qemu >/dev/null || \
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Thu Apr 2 2020 Huawei Technologies Co., Ltd. <fangying1@huawei.com>
|
* Sat Apr 11 4 2020 Huawei Technologies Co., Ltd. <fangying1@huawei.com>
|
||||||
util/async: Add memory barrier to aio_ctx_prepare
|
- aio-wait: delegate polling of main AioContext if BQL not held
|
||||||
|
- async: use explicit memory barriers
|
||||||
|
|
||||||
* Wed Mar 18 2020 Huawei Technologies Co., Ltd. <fangying1@huawei.com>
|
* Wed Mar 18 2020 Huawei Technologies Co., Ltd. <fangying1@huawei.com>
|
||||||
- pcie: Add pcie-root-port fast plug/unplug feature
|
- pcie: Add pcie-root-port fast plug/unplug feature
|
||||||
|
|||||||
@ -1,70 +0,0 @@
|
|||||||
From 99026ec6a2735c6ff2f094ac247f558f14e3f3b9 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Ying Fang <fangying1@huawei.com>
|
|
||||||
Date: Thu, 2 Apr 2020 15:53:47 +0800
|
|
||||||
Subject: [PATCH] util/async: Add memory barrier to aio_ctx_prepare
|
|
||||||
|
|
||||||
Qemu main thread is found to hang up in the mainloop when doing
|
|
||||||
image format convert on aarch64 platform and it is highly
|
|
||||||
reproduceable by executing test using:
|
|
||||||
|
|
||||||
qemu-img convert -f qcow2 -O qcow2 origin.qcow2 converted.qcow2
|
|
||||||
|
|
||||||
This mysterious hang can be explained by a race condition between
|
|
||||||
the main thread and an io worker thread. There can be a chance that
|
|
||||||
the last worker thread has called aio_bh_schedule_oneshot and it is
|
|
||||||
checking against notify_me to deliver a notfiy event. At the same
|
|
||||||
time, the main thread is calling aio_ctx_prepare however it first
|
|
||||||
calls qemu_timeout_ns_to_ms, thus the worker thread did not see
|
|
||||||
notify_me as true and did not send a notify event. The time line
|
|
||||||
can be shown in the following way:
|
|
||||||
|
|
||||||
Main Thread
|
|
||||||
------------------------------------------------
|
|
||||||
aio_ctx_prepare
|
|
||||||
atomic_or(&ctx->notify_me, 1);
|
|
||||||
/* out of order execution goes here */
|
|
||||||
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
|
|
||||||
|
|
||||||
Worker Thread
|
|
||||||
-----------------------------------------------
|
|
||||||
aio_bh_schedule_oneshot -> aio_bh_enqueue
|
|
||||||
aio_notify
|
|
||||||
smp_mb();
|
|
||||||
if (ctx->notify_me) { /* worker thread checks notify_me here */
|
|
||||||
event_notifier_set(&ctx->notifier);
|
|
||||||
atomic_mb_set(&ctx->notified, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
Normal VM runtime is not affected by this hang since there is always some
|
|
||||||
timer timeout or subsequent io worker come and notify the main thead.
|
|
||||||
To fix this problem, a memory barrier is added to aio_ctx_prepare and
|
|
||||||
it is proved to have the hang fixed in our test.
|
|
||||||
|
|
||||||
This hang is not observed on the x86 platform however it can be easily
|
|
||||||
reproduced on the aarch64 platform, thus it is architecture related.
|
|
||||||
Not sure if this is revelant to Commit eabc977973103527bbb8fed69c91cfaa6691f8ab
|
|
||||||
|
|
||||||
Signed-off-by: Ying Fang <fangying1@huawei.com>
|
|
||||||
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
|
|
||||||
Reported-by: Euler Robot <euler.robot@huawei.com>
|
|
||||||
---
|
|
||||||
util/async.c | 3 ++-
|
|
||||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/util/async.c b/util/async.c
|
|
||||||
index afc17fb3..50dfa9ce 100644
|
|
||||||
--- a/util/async.c
|
|
||||||
+++ b/util/async.c
|
|
||||||
@@ -222,7 +222,8 @@ aio_ctx_prepare(GSource *source, gint *timeout)
|
|
||||||
AioContext *ctx = (AioContext *) source;
|
|
||||||
|
|
||||||
atomic_or(&ctx->notify_me, 1);
|
|
||||||
-
|
|
||||||
+ /* Make sure notify_me is set before aio_compute_timeout */
|
|
||||||
+ smp_mb();
|
|
||||||
/* We assume there is no timeout already supplied */
|
|
||||||
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
|
|
||||||
|
|
||||||
--
|
|
||||||
2.23.0
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user