diff --git a/Revert-monitor-limit-io-error-qmp-event-to-at-most-o.patch b/Revert-monitor-limit-io-error-qmp-event-to-at-most-o.patch new file mode 100644 index 0000000..112161f --- /dev/null +++ b/Revert-monitor-limit-io-error-qmp-event-to-at-most-o.patch @@ -0,0 +1,31 @@ +From e42b57adeac96c7d39b1c032ab3b66b7eff18cc8 Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 29 Mar 2022 15:18:56 +0800 +Subject: [PATCH 2/2] Revert "monitor: limit io error qmp event to at most once + per 60s" + +This reverts commit 44f45b5c163efed5387dac40e229e0a50bf5921a. + +The commit 44f45b5c will reduse the IO-hang related log, which +is useful to solve the problem. + +Signed-off-by: Yan Wang +--- + monitor/monitor.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/monitor/monitor.c b/monitor/monitor.c +index 28206bedc4..257ef4ee54 100644 +--- a/monitor/monitor.c ++++ b/monitor/monitor.c +@@ -301,7 +301,6 @@ static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = { + [QAPI_EVENT_QUORUM_FAILURE] = { 1000 * SCALE_MS }, + [QAPI_EVENT_VSERPORT_CHANGE] = { 1000 * SCALE_MS }, + [QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS }, +- [QAPI_EVENT_BLOCK_IO_ERROR] = { 60L * 1000 * SCALE_MS }, + }; + + /* +-- +2.27.0 + diff --git a/scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch b/scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch new file mode 100644 index 0000000..4f4804f --- /dev/null +++ b/scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch @@ -0,0 +1,80 @@ +From 3ab10a5ad9bf1cbf3b4603f5a930a7924a07ad5a Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 29 Mar 2022 12:05:56 +0800 +Subject: [PATCH 1/2] scsi-bus: fix incorrect call for + blk_error_retry_reset_timeout() + +Fix commit 52115ca0("scsi-disk: Add support for retry on errors"). +Call Stack: + ... + scsi_read_data() + scsi_do_read(r, 0) + scsi_disk_req_check_error() + blk_error_retry_reset_timeout() + blk->retry_start_time = 0; + +It will cause IO hang when storage network disconnected. Before the +storage network recovered, the upper call stack will reset the +retry_start_time, and cause the next IO operation not returned immediately. + +Signed-off-by: Yan Wang +--- + hw/scsi/scsi-disk.c | 20 ++++++++++++++++---- + 1 file changed, 16 insertions(+), 4 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 8661932a15..a66d2b0a98 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -255,10 +255,8 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + } + } + +-static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) ++static bool scsi_disk_req_handle_error(SCSIDiskReq *r, int ret, bool acct_failed) + { +- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); +- + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + return true; +@@ -268,6 +266,17 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return scsi_handle_rw_error(r, ret, acct_failed); + } + ++ return false; ++} ++ ++static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) ++{ ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); ++ ++ if (r->req.io_canceled || ret < 0) { ++ return scsi_disk_req_handle_error(r, ret, acct_failed); ++ } ++ + blk_error_retry_reset_timeout(s->qdev.conf.blk); + return false; + } +@@ -418,7 +427,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret) + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + + assert (r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_handle_error(r, ret, false)) { + goto done; + } + +@@ -458,6 +467,9 @@ static void scsi_do_read_cb(void *opaque, int ret) + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); ++ if (!r->req.io_canceled) { ++ blk_error_retry_reset_timeout(s->qdev.conf.blk); ++ } + } + scsi_do_read(opaque, ret); + aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); +-- +2.27.0 +