fix some IO hang bugs
scsi-bus: fix incorrect call for blk_error_retry_reset_timeout() Revert "monitor: limit io error qmp event to at most once per 60s" Signed-off-by: Yan Wang <wangyan122@huawei.com> Signed-off-by: yezengruan <yezengruan@huawei.com>
This commit is contained in:
parent
f0684b551e
commit
90f33cc473
31
Revert-monitor-limit-io-error-qmp-event-to-at-most-o.patch
Normal file
31
Revert-monitor-limit-io-error-qmp-event-to-at-most-o.patch
Normal file
@ -0,0 +1,31 @@
|
||||
From e42b57adeac96c7d39b1c032ab3b66b7eff18cc8 Mon Sep 17 00:00:00 2001
|
||||
From: Yan Wang <wangyan122@huawei.com>
|
||||
Date: Tue, 29 Mar 2022 15:18:56 +0800
|
||||
Subject: [PATCH 2/2] Revert "monitor: limit io error qmp event to at most once
|
||||
per 60s"
|
||||
|
||||
This reverts commit 44f45b5c163efed5387dac40e229e0a50bf5921a.
|
||||
|
||||
The commit 44f45b5c will reduse the IO-hang related log, which
|
||||
is useful to solve the problem.
|
||||
|
||||
Signed-off-by: Yan Wang <wangyan122@huawei.com>
|
||||
---
|
||||
monitor/monitor.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/monitor/monitor.c b/monitor/monitor.c
|
||||
index 28206bedc4..257ef4ee54 100644
|
||||
--- a/monitor/monitor.c
|
||||
+++ b/monitor/monitor.c
|
||||
@@ -301,7 +301,6 @@ static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
|
||||
[QAPI_EVENT_QUORUM_FAILURE] = { 1000 * SCALE_MS },
|
||||
[QAPI_EVENT_VSERPORT_CHANGE] = { 1000 * SCALE_MS },
|
||||
[QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS },
|
||||
- [QAPI_EVENT_BLOCK_IO_ERROR] = { 60L * 1000 * SCALE_MS },
|
||||
};
|
||||
|
||||
/*
|
||||
--
|
||||
2.27.0
|
||||
|
||||
80
scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch
Normal file
80
scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch
Normal file
@ -0,0 +1,80 @@
|
||||
From 3ab10a5ad9bf1cbf3b4603f5a930a7924a07ad5a Mon Sep 17 00:00:00 2001
|
||||
From: Yan Wang <wangyan122@huawei.com>
|
||||
Date: Tue, 29 Mar 2022 12:05:56 +0800
|
||||
Subject: [PATCH 1/2] scsi-bus: fix incorrect call for
|
||||
blk_error_retry_reset_timeout()
|
||||
|
||||
Fix commit 52115ca0("scsi-disk: Add support for retry on errors").
|
||||
Call Stack:
|
||||
...
|
||||
scsi_read_data()
|
||||
scsi_do_read(r, 0)
|
||||
scsi_disk_req_check_error()
|
||||
blk_error_retry_reset_timeout()
|
||||
blk->retry_start_time = 0;
|
||||
|
||||
It will cause IO hang when storage network disconnected. Before the
|
||||
storage network recovered, the upper call stack will reset the
|
||||
retry_start_time, and cause the next IO operation not returned immediately.
|
||||
|
||||
Signed-off-by: Yan Wang <wangyan122@huawei.com>
|
||||
---
|
||||
hw/scsi/scsi-disk.c | 20 ++++++++++++++++----
|
||||
1 file changed, 16 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||
index 8661932a15..a66d2b0a98 100644
|
||||
--- a/hw/scsi/scsi-disk.c
|
||||
+++ b/hw/scsi/scsi-disk.c
|
||||
@@ -255,10 +255,8 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
|
||||
}
|
||||
}
|
||||
|
||||
-static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
|
||||
+static bool scsi_disk_req_handle_error(SCSIDiskReq *r, int ret, bool acct_failed)
|
||||
{
|
||||
- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
-
|
||||
if (r->req.io_canceled) {
|
||||
scsi_req_cancel_complete(&r->req);
|
||||
return true;
|
||||
@@ -268,6 +266,17 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
|
||||
return scsi_handle_rw_error(r, ret, acct_failed);
|
||||
}
|
||||
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
|
||||
+{
|
||||
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
+
|
||||
+ if (r->req.io_canceled || ret < 0) {
|
||||
+ return scsi_disk_req_handle_error(r, ret, acct_failed);
|
||||
+ }
|
||||
+
|
||||
blk_error_retry_reset_timeout(s->qdev.conf.blk);
|
||||
return false;
|
||||
}
|
||||
@@ -418,7 +427,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
|
||||
SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
|
||||
|
||||
assert (r->req.aiocb == NULL);
|
||||
- if (scsi_disk_req_check_error(r, ret, false)) {
|
||||
+ if (scsi_disk_req_handle_error(r, ret, false)) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -458,6 +467,9 @@ static void scsi_do_read_cb(void *opaque, int ret)
|
||||
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
} else {
|
||||
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
+ if (!r->req.io_canceled) {
|
||||
+ blk_error_retry_reset_timeout(s->qdev.conf.blk);
|
||||
+ }
|
||||
}
|
||||
scsi_do_read(opaque, ret);
|
||||
aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||
--
|
||||
2.27.0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user