!29 asyn-fix-qemu-hang
Merge pull request !29 from FangYing/async-fix-qemu-hang
This commit is contained in:
commit
df3f209a2a
@ -0,0 +1,120 @@
|
||||
From a2bae876b7f694b12073bac8ad6668e4d975ad88 Mon Sep 17 00:00:00 2001
|
||||
From: Ying Fang <fangying1@huawei.com>
|
||||
Date: Fri, 10 Apr 2020 16:08:19 +0000
|
||||
Subject: [PATCH 1/2] aio-wait: delegate polling of main AioContext if BQL not
|
||||
held
|
||||
|
||||
Any thread that is not a iothread returns NULL for qemu_get_current_aio_context().
|
||||
As a result, it would also return true for
|
||||
in_aio_context_home_thread(qemu_get_aio_context()), causing
|
||||
AIO_WAIT_WHILE to invoke aio_poll() directly. This is incorrect
|
||||
if the BQL is not held, because aio_poll() does not expect to
|
||||
run concurrently from multiple threads, and it can actually
|
||||
happen when savevm writes to the vmstate file from the
|
||||
migration thread.
|
||||
|
||||
Therefore, restrict in_aio_context_home_thread to return true
|
||||
for the main AioContext only if the BQL is held.
|
||||
|
||||
The function is moved to aio-wait.h because it is mostly used
|
||||
there and to avoid a circular reference between main-loop.h
|
||||
and block/aio.h.
|
||||
|
||||
upstream_url: https://patchwork.kernel.org/patch/11482099/
|
||||
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Message-Id: <20200407140746.8041-5-pbonzini@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
include/block/aio-wait.h | 22 ++++++++++++++++++++++
|
||||
include/block/aio.h | 29 ++++++++++-------------------
|
||||
2 files changed, 32 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||
index afd0ff7e..d349e7e3 100644
|
||||
--- a/include/block/aio-wait.h
|
||||
+++ b/include/block/aio-wait.h
|
||||
@@ -26,6 +26,7 @@
|
||||
#define QEMU_AIO_WAIT_H
|
||||
|
||||
#include "block/aio.h"
|
||||
+#include "qemu/main-loop.h"
|
||||
|
||||
/**
|
||||
* AioWait:
|
||||
@@ -124,4 +125,25 @@ void aio_wait_kick(void);
|
||||
*/
|
||||
void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
|
||||
|
||||
+/**
|
||||
+ * in_aio_context_home_thread:
|
||||
+ * @ctx: the aio context
|
||||
+ *
|
||||
+ * Return whether we are running in the thread that normally runs @ctx. Note
|
||||
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
|
||||
+ * still only has one home thread that is responsible for running it.
|
||||
+ */
|
||||
+static inline bool in_aio_context_home_thread(AioContext *ctx)
|
||||
+{
|
||||
+ if (ctx == qemu_get_current_aio_context()) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (ctx == qemu_get_aio_context()) {
|
||||
+ return qemu_mutex_iothread_locked();
|
||||
+ } else {
|
||||
+ return false;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
#endif /* QEMU_AIO_WAIT */
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index 0ca25dfe..c527893b 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -61,12 +61,16 @@ struct AioContext {
|
||||
QLIST_HEAD(, AioHandler) aio_handlers;
|
||||
|
||||
/* Used to avoid unnecessary event_notifier_set calls in aio_notify;
|
||||
- * accessed with atomic primitives. If this field is 0, everything
|
||||
- * (file descriptors, bottom halves, timers) will be re-evaluated
|
||||
- * before the next blocking poll(), thus the event_notifier_set call
|
||||
- * can be skipped. If it is non-zero, you may need to wake up a
|
||||
- * concurrent aio_poll or the glib main event loop, making
|
||||
- * event_notifier_set necessary.
|
||||
+ * only written from the AioContext home thread, or under the BQL in
|
||||
+ * the case of the main AioContext. However, it is read from any
|
||||
+ * thread so it is still accessed with atomic primitives.
|
||||
+ *
|
||||
+ * If this field is 0, everything (file descriptors, bottom halves,
|
||||
+ * timers) will be re-evaluated before the next blocking poll() or
|
||||
+ * io_uring wait; therefore, the event_notifier_set call can be
|
||||
+ * skipped. If it is non-zero, you may need to wake up a concurrent
|
||||
+ * aio_poll or the glib main event loop, making event_notifier_set
|
||||
+ * necessary.
|
||||
*
|
||||
* Bit 0 is reserved for GSource usage of the AioContext, and is 1
|
||||
* between a call to aio_ctx_prepare and the next call to aio_ctx_check.
|
||||
@@ -581,19 +585,6 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
|
||||
*/
|
||||
AioContext *qemu_get_current_aio_context(void);
|
||||
|
||||
-/**
|
||||
- * in_aio_context_home_thread:
|
||||
- * @ctx: the aio context
|
||||
- *
|
||||
- * Return whether we are running in the thread that normally runs @ctx. Note
|
||||
- * that acquiring/releasing ctx does not affect the outcome, each AioContext
|
||||
- * still only has one home thread that is responsible for running it.
|
||||
- */
|
||||
-static inline bool in_aio_context_home_thread(AioContext *ctx)
|
||||
-{
|
||||
- return ctx == qemu_get_current_aio_context();
|
||||
-}
|
||||
-
|
||||
/**
|
||||
* aio_context_setup:
|
||||
* @ctx: the aio context
|
||||
--
|
||||
2.25.2
|
||||
|
||||
171
async-use-explicit-memory-barriers.patch
Normal file
171
async-use-explicit-memory-barriers.patch
Normal file
@ -0,0 +1,171 @@
|
||||
From 787af8ed2bc86dc8688727d62a251965d9c42e00 Mon Sep 17 00:00:00 2001
|
||||
From: Ying Fang <fangying1@huawei.com>
|
||||
Date: Fri, 10 Apr 2020 16:19:50 +0000
|
||||
Subject: [PATCH 2/2] async: use explicit memory barriers
|
||||
|
||||
When using C11 atomics, non-seqcst reads and writes do not participate
|
||||
in the total order of seqcst operations. In util/async.c and util/aio-posix.c,
|
||||
in particular, the pattern that we use
|
||||
|
||||
write ctx->notify_me write bh->scheduled
|
||||
read bh->scheduled read ctx->notify_me
|
||||
if !bh->scheduled, sleep if ctx->notify_me, notify
|
||||
|
||||
needs to use seqcst operations for both the write and the read. In
|
||||
general this is something that we do not want, because there can be
|
||||
many sources that are polled in addition to bottom halves. The
|
||||
alternative is to place a seqcst memory barrier between the write
|
||||
and the read. This also comes with a disadvantage, in that the
|
||||
memory barrier is implicit on strongly-ordered architectures and
|
||||
it wastes a few dozen clock cycles.
|
||||
|
||||
Fortunately, ctx->notify_me is never written concurrently by two
|
||||
threads, so we can assert that and relax the writes to ctx->notify_me.
|
||||
The resulting solution works and performs well on both aarch64 and x86.
|
||||
|
||||
Note that the atomic_set/atomic_read combination is not an atomic
|
||||
read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED;
|
||||
on x86, ATOMIC_RELAXED compiles to a locked operation.
|
||||
|
||||
upstream_url: https://patchwork.kernel.org/patch/11482103/
|
||||
|
||||
Analyzed-by: Ying Fang <fangying1@huawei.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Tested-by: Ying Fang <fangying1@huawei.com>
|
||||
Message-Id: <20200407140746.8041-6-pbonzini@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
util/aio-posix.c | 16 ++++++++++++++--
|
||||
util/aio-win32.c | 17 ++++++++++++++---
|
||||
util/async.c | 16 ++++++++++++----
|
||||
3 files changed, 40 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/util/aio-posix.c b/util/aio-posix.c
|
||||
index 6fbfa792..ca58b9a4 100644
|
||||
--- a/util/aio-posix.c
|
||||
+++ b/util/aio-posix.c
|
||||
@@ -613,6 +613,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||
int64_t timeout;
|
||||
int64_t start = 0;
|
||||
|
||||
+ /*
|
||||
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
|
||||
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
|
||||
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
|
||||
+ */
|
||||
assert(in_aio_context_home_thread(ctx));
|
||||
|
||||
/* aio_notify can avoid the expensive event_notifier_set if
|
||||
@@ -623,7 +628,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||
* so disable the optimization now.
|
||||
*/
|
||||
if (blocking) {
|
||||
- atomic_add(&ctx->notify_me, 2);
|
||||
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
|
||||
+ /*
|
||||
+ * Write ctx->notify_me before computing the timeout
|
||||
+ * (reading bottom half flags, etc.). Pairs with
|
||||
+ * smp_mb in aio_notify().
|
||||
+ */
|
||||
+ smp_mb();
|
||||
}
|
||||
|
||||
qemu_lockcnt_inc(&ctx->list_lock);
|
||||
@@ -668,7 +679,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||
}
|
||||
|
||||
if (blocking) {
|
||||
- atomic_sub(&ctx->notify_me, 2);
|
||||
+ /* Finish the poll before clearing the flag. */
|
||||
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
|
||||
aio_notify_accept(ctx);
|
||||
}
|
||||
|
||||
diff --git a/util/aio-win32.c b/util/aio-win32.c
|
||||
index a23b9c36..729d533f 100644
|
||||
--- a/util/aio-win32.c
|
||||
+++ b/util/aio-win32.c
|
||||
@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||
int count;
|
||||
int timeout;
|
||||
|
||||
+ /*
|
||||
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
|
||||
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
|
||||
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
|
||||
+ */
|
||||
+ assert(in_aio_context_home_thread(ctx));
|
||||
progress = false;
|
||||
|
||||
/* aio_notify can avoid the expensive event_notifier_set if
|
||||
@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||
* so disable the optimization now.
|
||||
*/
|
||||
if (blocking) {
|
||||
- atomic_add(&ctx->notify_me, 2);
|
||||
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
|
||||
+ /*
|
||||
+ * Write ctx->notify_me before computing the timeout
|
||||
+ * (reading bottom half flags, etc.). Pairs with
|
||||
+ * smp_mb in aio_notify().
|
||||
+ */
|
||||
+ smp_mb();
|
||||
}
|
||||
|
||||
qemu_lockcnt_inc(&ctx->list_lock);
|
||||
@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
|
||||
if (blocking) {
|
||||
assert(first);
|
||||
- assert(in_aio_context_home_thread(ctx));
|
||||
- atomic_sub(&ctx->notify_me, 2);
|
||||
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
|
||||
aio_notify_accept(ctx);
|
||||
}
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index afc17fb3..12b33204 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -221,7 +221,14 @@ aio_ctx_prepare(GSource *source, gint *timeout)
|
||||
{
|
||||
AioContext *ctx = (AioContext *) source;
|
||||
|
||||
- atomic_or(&ctx->notify_me, 1);
|
||||
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);
|
||||
+
|
||||
+ /*
|
||||
+ * Write ctx->notify_me before computing the timeout
|
||||
+ * (reading bottom half flags, etc.). Pairs with
|
||||
+ * smp_mb in aio_notify().
|
||||
+ */
|
||||
+ smp_mb();
|
||||
|
||||
/* We assume there is no timeout already supplied */
|
||||
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
|
||||
@@ -239,7 +246,8 @@ aio_ctx_check(GSource *source)
|
||||
AioContext *ctx = (AioContext *) source;
|
||||
QEMUBH *bh;
|
||||
|
||||
- atomic_and(&ctx->notify_me, ~1);
|
||||
+ /* Finish computing the timeout before clearing the flag. */
|
||||
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
|
||||
aio_notify_accept(ctx);
|
||||
|
||||
for (bh = ctx->first_bh; bh; bh = bh->next) {
|
||||
@@ -344,10 +352,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx)
|
||||
void aio_notify(AioContext *ctx)
|
||||
{
|
||||
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
|
||||
- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
|
||||
+ * with smp_mb in aio_ctx_prepare or aio_poll.
|
||||
*/
|
||||
smp_mb();
|
||||
- if (ctx->notify_me) {
|
||||
+ if (atomic_read(&ctx->notify_me)) {
|
||||
event_notifier_set(&ctx->notifier);
|
||||
atomic_mb_set(&ctx->notified, true);
|
||||
}
|
||||
--
|
||||
2.25.2
|
||||
|
||||
@ -61,7 +61,8 @@ Patch0048: COLO-compare-Fix-incorrect-if-logic.patch
|
||||
Patch0049: qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch
|
||||
Patch0050: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch
|
||||
Patch0051: pcie-Compat-with-devices-which-do-not-support-Link-W.patch
|
||||
Patch0052: util-async-Add-memory-barrier-to-aio_ctx_prepare.patch
|
||||
Patch0052: aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch
|
||||
Patch0053: async-use-explicit-memory-barriers.patch
|
||||
|
||||
BuildRequires: flex
|
||||
BuildRequires: bison
|
||||
@ -397,8 +398,9 @@ getent passwd qemu >/dev/null || \
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Thu Apr 2 2020 Huawei Technologies Co., Ltd. <fangying1@huawei.com>
|
||||
util/async: Add memory barrier to aio_ctx_prepare
|
||||
* Sat Apr 11 4 2020 Huawei Technologies Co., Ltd. <fangying1@huawei.com>
|
||||
- aio-wait: delegate polling of main AioContext if BQL not held
|
||||
- async: use explicit memory barriers
|
||||
|
||||
* Wed Mar 18 2020 Huawei Technologies Co., Ltd. <fangying1@huawei.com>
|
||||
- pcie: Add pcie-root-port fast plug/unplug feature
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
From 99026ec6a2735c6ff2f094ac247f558f14e3f3b9 Mon Sep 17 00:00:00 2001
|
||||
From: Ying Fang <fangying1@huawei.com>
|
||||
Date: Thu, 2 Apr 2020 15:53:47 +0800
|
||||
Subject: [PATCH] util/async: Add memory barrier to aio_ctx_prepare
|
||||
|
||||
Qemu main thread is found to hang up in the mainloop when doing
|
||||
image format convert on aarch64 platform and it is highly
|
||||
reproduceable by executing test using:
|
||||
|
||||
qemu-img convert -f qcow2 -O qcow2 origin.qcow2 converted.qcow2
|
||||
|
||||
This mysterious hang can be explained by a race condition between
|
||||
the main thread and an io worker thread. There can be a chance that
|
||||
the last worker thread has called aio_bh_schedule_oneshot and it is
|
||||
checking against notify_me to deliver a notfiy event. At the same
|
||||
time, the main thread is calling aio_ctx_prepare however it first
|
||||
calls qemu_timeout_ns_to_ms, thus the worker thread did not see
|
||||
notify_me as true and did not send a notify event. The time line
|
||||
can be shown in the following way:
|
||||
|
||||
Main Thread
|
||||
------------------------------------------------
|
||||
aio_ctx_prepare
|
||||
atomic_or(&ctx->notify_me, 1);
|
||||
/* out of order execution goes here */
|
||||
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
|
||||
|
||||
Worker Thread
|
||||
-----------------------------------------------
|
||||
aio_bh_schedule_oneshot -> aio_bh_enqueue
|
||||
aio_notify
|
||||
smp_mb();
|
||||
if (ctx->notify_me) { /* worker thread checks notify_me here */
|
||||
event_notifier_set(&ctx->notifier);
|
||||
atomic_mb_set(&ctx->notified, true);
|
||||
}
|
||||
|
||||
Normal VM runtime is not affected by this hang since there is always some
|
||||
timer timeout or subsequent io worker come and notify the main thead.
|
||||
To fix this problem, a memory barrier is added to aio_ctx_prepare and
|
||||
it is proved to have the hang fixed in our test.
|
||||
|
||||
This hang is not observed on the x86 platform however it can be easily
|
||||
reproduced on the aarch64 platform, thus it is architecture related.
|
||||
Not sure if this is revelant to Commit eabc977973103527bbb8fed69c91cfaa6691f8ab
|
||||
|
||||
Signed-off-by: Ying Fang <fangying1@huawei.com>
|
||||
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
|
||||
Reported-by: Euler Robot <euler.robot@huawei.com>
|
||||
---
|
||||
util/async.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index afc17fb3..50dfa9ce 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -222,7 +222,8 @@ aio_ctx_prepare(GSource *source, gint *timeout)
|
||||
AioContext *ctx = (AioContext *) source;
|
||||
|
||||
atomic_or(&ctx->notify_me, 1);
|
||||
-
|
||||
+ /* Make sure notify_me is set before aio_compute_timeout */
|
||||
+ smp_mb();
|
||||
/* We assume there is no timeout already supplied */
|
||||
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
|
||||
|
||||
--
|
||||
2.23.0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user