From 1e4b6553e38c8683a9194247e6dad4b63107a13c Mon Sep 17 00:00:00 2001 From: Ying Fang Date: Fri, 17 Apr 2020 18:12:50 +0800 Subject: [PATCH] Backport: backport form upstream stable v4.1.1 This patch backports bugfix patch series from qemu upstream v4.1.1 Signed-off-by: Ying Fang --- ...Check-for-ECANCELED-in-aio-callbacks.patch | 88 ++++ block-Add-bdrv_co_get_self_request.patch | 59 +++ ...ait-mark-serialising-requests-public.patch | 131 +++++ ...not-abort-if-a-block-driver-is-not-a.patch | 95 ++++ ...six-Let-post-EOF-fallocate-serialize.patch | 69 +++ block-file-posix-Reduce-xfsctl-use.patch | 165 ++++++ block-io-refactor-padding.patch | 481 ++++++++++++++++++ ...k-nfs-tear-down-aio-before-nfs_close.patch | 41 ++ ...osix-Always-allocate-the-first-block.patch | 343 +++++++++++++ ...corruption-introduced-by-commit-8ac0.patch | 66 +++ ...pshot-Restrict-set-of-snapshot-nodes.patch | 124 +++++ ...e-nodes-head-while-removing-all-bdrv.patch | 61 +++ ...tine-Add-qemu_co_mutex_assert_locked.patch | 50 ++ ...re-AIO-callback-is-invoked-after-can.patch | 79 +++ ...ap-handle-set-reset-with-zero-length.patch | 50 ++ ...t-NSACR.-CP11-CP10-for-NS-kernel-boo.patch | 47 ++ ...oader-Fix-possible-crash-in-rom_copy.patch | 45 ++ ...host-user-fix-SLAVE_SEND_FD-handling.patch | 42 ++ ...l-in-edk2-submodules-so-we-can-build.patch | 60 +++ ...or_top_bs-drained-after-dropping-per.patch | 52 ++ ...ke-die-id-mandatory-unless-necessary.patch | 102 ++++ pr-manager-Fix-invalid-g_free-crash-bug.patch | 39 ++ qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch | 35 ++ ...tion-bug-in-qcow2_detect_metadata_pr.patch | 71 +++ ...lculation-of-the-maximum-L2-cache-si.patch | 58 +++ qemu.spec | 43 ++ ...k2-don-t-pull-in-submodules-when-bui.patch | 54 ++ ...finite-loop-while-executing-script-C.patch | 104 ++++ ...-reading-flags-from-FPSCR-for-M-prof.patch | 41 ++ ...-abort-on-M-profile-exception-return.patch | 103 ++++ ...m-Free-TCG-temps-in-trans_VMOV_64_sp.patch | 40 ++ ...p-Cocoa-display-on-macOS-10.15-Catal.patch | 62 +++ util-hbitmap-strict-hbitmap_reset.patch | 77 +++ util-iov-improve-qemu_iovec_is_zero.patch | 102 ++++ ...v-introduce-qemu_iovec_init_extended.patch | 177 +++++++ ...e-features-if-the-char-dev-is-closed.patch | 42 ++ ...l-the-pending-BH-when-the-dataplane-.patch | 80 +++ ...-prevent-offloads-reset-on-migration.patch | 122 +++++ virtio-new-post_load-hook.patch | 63 +++ ...turn-0-from-vpc_co_create-on-success.patch | 49 ++ ...tise-die-id-in-query-hotpluggbale-cp.patch | 60 +++ 41 files changed, 3672 insertions(+) create mode 100644 Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch create mode 100644 block-Add-bdrv_co_get_self_request.patch create mode 100644 block-Make-wait-mark-serialising-requests-public.patch create mode 100644 block-create-Do-not-abort-if-a-block-driver-is-not-a.patch create mode 100644 block-file-posix-Let-post-EOF-fallocate-serialize.patch create mode 100644 block-file-posix-Reduce-xfsctl-use.patch create mode 100644 block-io-refactor-padding.patch create mode 100644 block-nfs-tear-down-aio-before-nfs_close.patch create mode 100644 block-posix-Always-allocate-the-first-block.patch create mode 100644 block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch create mode 100644 block-snapshot-Restrict-set-of-snapshot-nodes.patch create mode 100644 blockjob-update-nodes-head-while-removing-all-bdrv.patch create mode 100644 coroutine-Add-qemu_co_mutex_assert_locked.patch create mode 100644 dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch create mode 100644 hbitmap-handle-set-reset-with-zero-length.patch create mode 100644 hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch create mode 100644 hw-core-loader-Fix-possible-crash-in-rom_copy.patch create mode 100644 libvhost-user-fix-SLAVE_SEND_FD-handling.patch create mode 100644 make-release-pull-in-edk2-submodules-so-we-can-build.patch create mode 100644 mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch create mode 100644 pc-Don-t-make-die-id-mandatory-unless-necessary.patch create mode 100644 pr-manager-Fix-invalid-g_free-crash-bug.patch create mode 100644 qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch create mode 100644 qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch create mode 100644 qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch create mode 100644 roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch create mode 100644 scsi-lsi-exit-infinite-loop-while-executing-script-C.patch create mode 100644 target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch create mode 100644 target-arm-Don-t-abort-on-M-profile-exception-return.patch create mode 100644 target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch create mode 100644 ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch create mode 100644 util-hbitmap-strict-hbitmap_reset.patch create mode 100644 util-iov-improve-qemu_iovec_is_zero.patch create mode 100644 util-iov-introduce-qemu_iovec_init_extended.patch create mode 100644 vhost-user-save-features-if-the-char-dev-is-closed.patch create mode 100644 virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch create mode 100644 virtio-net-prevent-offloads-reset-on-migration.patch create mode 100644 virtio-new-post_load-hook.patch create mode 100644 vpc-Return-0-from-vpc_co_create-on-success.patch create mode 100644 x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch diff --git a/Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch b/Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch new file mode 100644 index 0000000..200e0b2 --- /dev/null +++ b/Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch @@ -0,0 +1,88 @@ +From 73a5bf472921068e6db10e7e325b7ac46f111834 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 18:36:05 -0400 +Subject: [PATCH] Revert "ide/ahci: Check for -ECANCELED in aio callbacks" + +This reverts commit 0d910cfeaf2076b116b4517166d5deb0fea76394. + +It's not correct to just ignore an error code in a callback; we need to +handle that error and possible report failure to the guest so that they +don't wait indefinitely for an operation that will now never finish. + +This ought to help cases reported by Nutanix where iSCSI returns a +legitimate -ECANCELED for certain operations which should be propagated +normally. + +Reported-by: Shaju Abraham +Signed-off-by: John Snow +Message-id: 20190729223605.7163-1-jsnow@redhat.com +Signed-off-by: John Snow +(cherry picked from commit 8ec41c4265714255d5a138f8b538faf3583dcff6) +Signed-off-by: Michael Roth +--- + hw/ide/ahci.c | 3 --- + hw/ide/core.c | 14 -------------- + 2 files changed, 17 deletions(-) + +diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c +index 00ba422a48..6aaf66534a 100644 +--- a/hw/ide/ahci.c ++++ b/hw/ide/ahci.c +@@ -1023,9 +1023,6 @@ static void ncq_cb(void *opaque, int ret) + IDEState *ide_state = &ncq_tfs->drive->port.ifs[0]; + + ncq_tfs->aiocb = NULL; +- if (ret == -ECANCELED) { +- return; +- } + + if (ret < 0) { + bool is_read = ncq_tfs->cmd == READ_FPDMA_QUEUED; +diff --git a/hw/ide/core.c b/hw/ide/core.c +index 6afadf894f..8e1624f7ce 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -722,9 +722,6 @@ static void ide_sector_read_cb(void *opaque, int ret) + s->pio_aiocb = NULL; + s->status &= ~BUSY_STAT; + +- if (ret == -ECANCELED) { +- return; +- } + if (ret != 0) { + if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO | + IDE_RETRY_READ)) { +@@ -840,10 +837,6 @@ static void ide_dma_cb(void *opaque, int ret) + uint64_t offset; + bool stay_active = false; + +- if (ret == -ECANCELED) { +- return; +- } +- + if (ret == -EINVAL) { + ide_dma_error(s); + return; +@@ -975,10 +968,6 @@ static void ide_sector_write_cb(void *opaque, int ret) + IDEState *s = opaque; + int n; + +- if (ret == -ECANCELED) { +- return; +- } +- + s->pio_aiocb = NULL; + s->status &= ~BUSY_STAT; + +@@ -1058,9 +1047,6 @@ static void ide_flush_cb(void *opaque, int ret) + + s->pio_aiocb = NULL; + +- if (ret == -ECANCELED) { +- return; +- } + if (ret < 0) { + /* XXX: What sector number to set here? */ + if (ide_handle_rw_error(s, -ret, IDE_RETRY_FLUSH)) { +-- +2.23.0 diff --git a/block-Add-bdrv_co_get_self_request.patch b/block-Add-bdrv_co_get_self_request.patch new file mode 100644 index 0000000..4972f08 --- /dev/null +++ b/block-Add-bdrv_co_get_self_request.patch @@ -0,0 +1,59 @@ +From d9b88f7e0d56feb4d7daa2506e2756fc48e975a1 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Fri, 1 Nov 2019 16:25:09 +0100 +Subject: [PATCH] block: Add bdrv_co_get_self_request() + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191101152510.11719-3-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit c28107e9e55b11cd35cf3dc2505e3e69d10dcf13) +Signed-off-by: Michael Roth +--- + block/io.c | 18 ++++++++++++++++++ + include/block/block_int.h | 1 + + 2 files changed, 19 insertions(+) + +diff --git a/block/io.c b/block/io.c +index d4ceaaa2ce..65b5102714 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -721,6 +721,24 @@ static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req) + (req->bytes == req->overlap_bytes); + } + ++/** ++ * Return the tracked request on @bs for the current coroutine, or ++ * NULL if there is none. ++ */ ++BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) ++{ ++ BdrvTrackedRequest *req; ++ Coroutine *self = qemu_coroutine_self(); ++ ++ QLIST_FOREACH(req, &bs->tracked_requests, list) { ++ if (req->co == self) { ++ return req; ++ } ++ } ++ ++ return NULL; ++} ++ + /** + * Round a region to cluster boundaries + */ +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 4465b02242..05ee6b4866 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -964,6 +964,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); + + bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); + void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); ++BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); + + int get_tmp_filename(char *filename, int size); + BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, +-- +2.23.0 diff --git a/block-Make-wait-mark-serialising-requests-public.patch b/block-Make-wait-mark-serialising-requests-public.patch new file mode 100644 index 0000000..162463c --- /dev/null +++ b/block-Make-wait-mark-serialising-requests-public.patch @@ -0,0 +1,131 @@ +From 590cff8230749794ba09b38f3ea4eb6b0f2f73b5 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Fri, 1 Nov 2019 16:25:08 +0100 +Subject: [PATCH] block: Make wait/mark serialising requests public + +Make both bdrv_mark_request_serialising() and +bdrv_wait_serialising_requests() public so they can be used from block +drivers. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191101152510.11719-2-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 304d9d7f034ff7f5e1e66a65b7f720f63a72c57e) + Conflicts: + block/io.c +*drop context dependency on 1acc3466a2 +Signed-off-by: Michael Roth +--- + block/io.c | 24 ++++++++++++------------ + include/block/block_int.h | 3 +++ + 2 files changed, 15 insertions(+), 12 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 07d2d825c3..d4ceaaa2ce 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -694,7 +694,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req, + qemu_co_mutex_unlock(&bs->reqs_lock); + } + +-static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) ++void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) + { + int64_t overlap_offset = req->offset & ~(align - 1); + uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align) +@@ -784,7 +784,7 @@ void bdrv_dec_in_flight(BlockDriverState *bs) + bdrv_wakeup(bs); + } + +-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) ++bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self) + { + BlockDriverState *bs = self->bs; + BdrvTrackedRequest *req; +@@ -1340,14 +1340,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, + * with each other for the same cluster. For example, in copy-on-read + * it ensures that the CoR read and write operations are atomic and + * guest writes cannot interleave between them. */ +- mark_request_serialising(req, bdrv_get_cluster_size(bs)); ++ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); + } + + /* BDRV_REQ_SERIALISING is only for write operation */ + assert(!(flags & BDRV_REQ_SERIALISING)); + + if (!(flags & BDRV_REQ_NO_SERIALISING)) { +- wait_serialising_requests(req); ++ bdrv_wait_serialising_requests(req); + } + + if (flags & BDRV_REQ_COPY_ON_READ) { +@@ -1736,10 +1736,10 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes, + assert(!(flags & ~BDRV_REQ_MASK)); + + if (flags & BDRV_REQ_SERIALISING) { +- mark_request_serialising(req, bdrv_get_cluster_size(bs)); ++ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); + } + +- waited = wait_serialising_requests(req); ++ waited = bdrv_wait_serialising_requests(req); + + assert(!waited || !req->serialising || + is_request_serialising_and_aligned(req)); +@@ -1905,8 +1905,8 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, + + padding = bdrv_init_padding(bs, offset, bytes, &pad); + if (padding) { +- mark_request_serialising(req, align); +- wait_serialising_requests(req); ++ bdrv_mark_request_serialising(req, align); ++ bdrv_wait_serialising_requests(req); + + bdrv_padding_rmw_read(child, req, &pad, true); + +@@ -1993,8 +1993,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + } + + if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) { +- mark_request_serialising(&req, align); +- wait_serialising_requests(&req); ++ bdrv_mark_request_serialising(&req, align); ++ bdrv_wait_serialising_requests(&req); + bdrv_padding_rmw_read(child, &req, &pad, false); + } + +@@ -3078,7 +3078,7 @@ static int coroutine_fn bdrv_co_copy_range_internal( + /* BDRV_REQ_SERIALISING is only for write operation */ + assert(!(read_flags & BDRV_REQ_SERIALISING)); + if (!(read_flags & BDRV_REQ_NO_SERIALISING)) { +- wait_serialising_requests(&req); ++ bdrv_wait_serialising_requests(&req); + } + + ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, +@@ -3205,7 +3205,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, + * new area, we need to make sure that no write requests are made to it + * concurrently or they might be overwritten by preallocation. */ + if (new_bytes) { +- mark_request_serialising(&req, 1); ++ bdrv_mark_request_serialising(&req, 1); + } + if (bs->read_only) { + error_setg(errp, "Image is read-only"); +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 3aa1e832a8..4465b02242 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -962,6 +962,9 @@ extern unsigned int bdrv_drain_all_count; + void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); + void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); + ++bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); ++void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); ++ + int get_tmp_filename(char *filename, int size); + BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, + const char *filename); +-- +2.23.0 diff --git a/block-create-Do-not-abort-if-a-block-driver-is-not-a.patch b/block-create-Do-not-abort-if-a-block-driver-is-not-a.patch new file mode 100644 index 0000000..73d2b9d --- /dev/null +++ b/block-create-Do-not-abort-if-a-block-driver-is-not-a.patch @@ -0,0 +1,95 @@ +From 088f1e8fd9e790bc5766bd43af134230abcff6dd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 12 Sep 2019 00:08:49 +0200 +Subject: [PATCH] block/create: Do not abort if a block driver is not available +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The 'blockdev-create' QMP command was introduced as experimental +feature in commit b0292b851b8, using the assert() debug call. +It got promoted to 'stable' command in 3fb588a0f2c, but the +assert call was not removed. + +Some block drivers are optional, and bdrv_find_format() might +return a NULL value, triggering the assertion. + +Stable code is not expected to abort, so return an error instead. + +This is easily reproducible when libnfs is not installed: + + ./configure + [...] + module support no + Block whitelist (rw) + Block whitelist (ro) + libiscsi support yes + libnfs support no + [...] + +Start QEMU: + + $ qemu-system-x86_64 -S -qmp unix:/tmp/qemu.qmp,server,nowait + +Send the 'blockdev-create' with the 'nfs' driver: + + $ ( cat << 'EOF' + {'execute': 'qmp_capabilities'} + {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} + EOF + ) | socat STDIO UNIX:/tmp/qemu.qmp + {"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 4}, "package": "v4.1.0-733-g89ea03a7dc"}, "capabilities": ["oob"]}} + {"return": {}} + +QEMU crashes: + + $ gdb qemu-system-x86_64 core + Program received signal SIGSEGV, Segmentation fault. + (gdb) bt + #0 0x00007ffff510957f in raise () at /lib64/libc.so.6 + #1 0x00007ffff50f3895 in abort () at /lib64/libc.so.6 + #2 0x00007ffff50f3769 in _nl_load_domain.cold.0 () at /lib64/libc.so.6 + #3 0x00007ffff5101a26 in .annobin_assert.c_end () at /lib64/libc.so.6 + #4 0x0000555555d7e1f1 in qmp_blockdev_create (job_id=0x555556baee40 "x", options=0x555557666610, errp=0x7fffffffc770) at block/create.c:69 + #5 0x0000555555c96b52 in qmp_marshal_blockdev_create (args=0x7fffdc003830, ret=0x7fffffffc7f8, errp=0x7fffffffc7f0) at qapi/qapi-commands-block-core.c:1314 + #6 0x0000555555deb0a0 in do_qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false, errp=0x7fffffffc898) at qapi/qmp-dispatch.c:131 + #7 0x0000555555deb2a1 in qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false) at qapi/qmp-dispatch.c:174 + +With this patch applied, QEMU returns a QMP error: + + {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} + {"id": "x", "error": {"class": "GenericError", "desc": "Block driver 'nfs' not found or not supported"}} + +Cc: qemu-stable@nongnu.org +Reported-by: Xu Tian +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Blake +Reviewed-by: John Snow +Signed-off-by: Kevin Wolf +(cherry picked from commit d90d5cae2b10efc0e8d0b3cc91ff16201853d3ba) +Signed-off-by: Michael Roth +--- + block/create.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/create.c b/block/create.c +index 95341219ef..de5e97bb18 100644 +--- a/block/create.c ++++ b/block/create.c +@@ -63,9 +63,13 @@ void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options, + const char *fmt = BlockdevDriver_str(options->driver); + BlockDriver *drv = bdrv_find_format(fmt); + ++ if (!drv) { ++ error_setg(errp, "Block driver '%s' not found or not supported", fmt); ++ return; ++ } ++ + /* If the driver is in the schema, we know that it exists. But it may not + * be whitelisted. */ +- assert(drv); + if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) { + error_setg(errp, "Driver is not whitelisted"); + return; +-- +2.23.0 diff --git a/block-file-posix-Let-post-EOF-fallocate-serialize.patch b/block-file-posix-Let-post-EOF-fallocate-serialize.patch new file mode 100644 index 0000000..bf7d34a --- /dev/null +++ b/block-file-posix-Let-post-EOF-fallocate-serialize.patch @@ -0,0 +1,69 @@ +From 7db05c8a732fbdc986a40aadf0de6dd23057d044 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Fri, 1 Nov 2019 16:25:10 +0100 +Subject: [PATCH] block/file-posix: Let post-EOF fallocate serialize + +The XFS kernel driver has a bug that may cause data corruption for qcow2 +images as of qemu commit c8bb23cbdbe32f. We can work around it by +treating post-EOF fallocates as serializing up until infinity (INT64_MAX +in practice). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191101152510.11719-4-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 292d06b925b2787ee6f2430996b95651cae42fce) +Signed-off-by: Michael Roth +--- + block/file-posix.c | 36 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 992eb4a798..c5df61b477 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2623,6 +2623,42 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, + RawPosixAIOData acb; + ThreadPoolFunc *handler; + ++#ifdef CONFIG_FALLOCATE ++ if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { ++ BdrvTrackedRequest *req; ++ uint64_t end; ++ ++ /* ++ * This is a workaround for a bug in the Linux XFS driver, ++ * where writes submitted through the AIO interface will be ++ * discarded if they happen beyond a concurrently running ++ * fallocate() that increases the file length (i.e., both the ++ * write and the fallocate() happen beyond the EOF). ++ * ++ * To work around it, we extend the tracked request for this ++ * zero write until INT64_MAX (effectively infinity), and mark ++ * it as serializing. ++ * ++ * We have to enable this workaround for all filesystems and ++ * AIO modes (not just XFS with aio=native), because for ++ * remote filesystems we do not know the host configuration. ++ */ ++ ++ req = bdrv_co_get_self_request(bs); ++ assert(req); ++ assert(req->type == BDRV_TRACKED_WRITE); ++ assert(req->offset <= offset); ++ assert(req->offset + req->bytes >= offset + bytes); ++ ++ end = INT64_MAX & -(uint64_t)bs->bl.request_alignment; ++ req->bytes = end - req->offset; ++ req->overlap_bytes = req->bytes; ++ ++ bdrv_mark_request_serialising(req, bs->bl.request_alignment); ++ bdrv_wait_serialising_requests(req); ++ } ++#endif ++ + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, +-- +2.23.0 diff --git a/block-file-posix-Reduce-xfsctl-use.patch b/block-file-posix-Reduce-xfsctl-use.patch new file mode 100644 index 0000000..69ceb45 --- /dev/null +++ b/block-file-posix-Reduce-xfsctl-use.patch @@ -0,0 +1,165 @@ +From 6f1a94035b02d3676a897ea5fa4cda4c62128228 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Fri, 23 Aug 2019 15:03:40 +0200 +Subject: [PATCH] block/file-posix: Reduce xfsctl() use +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This patch removes xfs_write_zeroes() and xfs_discard(). Both functions +have been added just before the same feature was present through +fallocate(): + +- fallocate() has supported PUNCH_HOLE for XFS since Linux 2.6.38 (March + 2011); xfs_discard() was added in December 2010. + +- fallocate() has supported ZERO_RANGE for XFS since Linux 3.15 (June + 2014); xfs_write_zeroes() was added in November 2013. + +Nowadays, all systems that qemu runs on should support both fallocate() +features (RHEL 7's kernel does). + +xfsctl() is still useful for getting the request alignment for O_DIRECT, +so this patch does not remove our dependency on it completely. + +Note that xfs_write_zeroes() had a bug: It calls ftruncate() when the +file is shorter than the specified range (because ZERO_RANGE does not +increase the file length). ftruncate() may yield and then discard data +that parallel write requests have written past the EOF in the meantime. +Dropping the function altogether fixes the bug. + +Suggested-by: Paolo Bonzini +Fixes: 50ba5b2d994853b38fed10e0841b119da0f8b8e5 +Reported-by: Lukáš Doktor +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Reviewed-by: Stefano Garzarella +Reviewed-by: John Snow +Tested-by: Stefano Garzarella +Tested-by: John Snow +Signed-off-by: Kevin Wolf +(cherry picked from commit b2c6f23f4a9f6d8f1b648705cd46d3713b78d6a2) +Signed-off-by: Michael Roth +--- + block/file-posix.c | 77 +--------------------------------------------- + 1 file changed, 1 insertion(+), 76 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 4479cc7ab4..992eb4a798 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1445,59 +1445,6 @@ out: + } + } + +-#ifdef CONFIG_XFS +-static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes) +-{ +- int64_t len; +- struct xfs_flock64 fl; +- int err; +- +- len = lseek(s->fd, 0, SEEK_END); +- if (len < 0) { +- return -errno; +- } +- +- if (offset + bytes > len) { +- /* XFS_IOC_ZERO_RANGE does not increase the file length */ +- if (ftruncate(s->fd, offset + bytes) < 0) { +- return -errno; +- } +- } +- +- memset(&fl, 0, sizeof(fl)); +- fl.l_whence = SEEK_SET; +- fl.l_start = offset; +- fl.l_len = bytes; +- +- if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) { +- err = errno; +- trace_file_xfs_write_zeroes(strerror(errno)); +- return -err; +- } +- +- return 0; +-} +- +-static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes) +-{ +- struct xfs_flock64 fl; +- int err; +- +- memset(&fl, 0, sizeof(fl)); +- fl.l_whence = SEEK_SET; +- fl.l_start = offset; +- fl.l_len = bytes; +- +- if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { +- err = errno; +- trace_file_xfs_discard(strerror(errno)); +- return -err; +- } +- +- return 0; +-} +-#endif +- + static int translate_err(int err) + { + if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || +@@ -1553,10 +1500,8 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) + static int handle_aiocb_write_zeroes(void *opaque) + { + RawPosixAIOData *aiocb = opaque; +-#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS) +- BDRVRawState *s = aiocb->bs->opaque; +-#endif + #ifdef CONFIG_FALLOCATE ++ BDRVRawState *s = aiocb->bs->opaque; + int64_t len; + #endif + +@@ -1564,12 +1509,6 @@ static int handle_aiocb_write_zeroes(void *opaque) + return handle_aiocb_write_zeroes_block(aiocb); + } + +-#ifdef CONFIG_XFS +- if (s->is_xfs) { +- return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes); +- } +-#endif +- + #ifdef CONFIG_FALLOCATE_ZERO_RANGE + if (s->has_write_zeroes) { + int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, +@@ -1632,14 +1571,6 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque) + } + #endif + +-#ifdef CONFIG_XFS +- if (s->is_xfs) { +- /* xfs_discard() guarantees that the discarded area reads as all-zero +- * afterwards, so we can use it here. */ +- return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); +- } +-#endif +- + /* If we couldn't manage to unmap while guaranteed that the area reads as + * all-zero afterwards, just write zeroes without unmapping */ + ret = handle_aiocb_write_zeroes(aiocb); +@@ -1716,12 +1647,6 @@ static int handle_aiocb_discard(void *opaque) + ret = -errno; + #endif + } else { +-#ifdef CONFIG_XFS +- if (s->is_xfs) { +- return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); +- } +-#endif +- + #ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes); +-- +2.23.0 diff --git a/block-io-refactor-padding.patch b/block-io-refactor-padding.patch new file mode 100644 index 0000000..7a26714 --- /dev/null +++ b/block-io-refactor-padding.patch @@ -0,0 +1,481 @@ +From 2e2ad02f2cecf419eaad0df982ceb5b41170cc7e Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 4 Jun 2019 19:15:05 +0300 +Subject: [PATCH] block/io: refactor padding + +We have similar padding code in bdrv_co_pwritev, +bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify +it. + +[Squashed in Vladimir's qemu-iotests 077 fix +--Stefan] + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Acked-by: Stefan Hajnoczi +Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com +Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 7a3f542fbdfd799be4fa6f8b96dc8c1e6933fce4) +*prereq for 292d06b9 +Signed-off-by: Michael Roth +--- + block/io.c | 365 +++++++++++++++++++++++++++++------------------------ + 1 file changed, 200 insertions(+), 165 deletions(-) + +diff --git a/block/io.c b/block/io.c +index dccf687acc..07d2d825c3 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1408,28 +1408,177 @@ out: + } + + /* +- * Handle a read request in coroutine context ++ * Request padding ++ * ++ * |<---- align ----->| |<----- align ---->| ++ * |<- head ->|<------------- bytes ------------->|<-- tail -->| ++ * | | | | | | ++ * -*----------$-------*-------- ... --------*-----$------------*--- ++ * | | | | | | ++ * | offset | | end | ++ * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end) ++ * [buf ... ) [tail_buf ) ++ * ++ * @buf is an aligned allocation needed to store @head and @tail paddings. @head ++ * is placed at the beginning of @buf and @tail at the @end. ++ * ++ * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk ++ * around tail, if tail exists. ++ * ++ * @merge_reads is true for small requests, ++ * if @buf_len == @head + bytes + @tail. In this case it is possible that both ++ * head and tail exist but @buf_len == align and @tail_buf == @buf. ++ */ ++typedef struct BdrvRequestPadding { ++ uint8_t *buf; ++ size_t buf_len; ++ uint8_t *tail_buf; ++ size_t head; ++ size_t tail; ++ bool merge_reads; ++ QEMUIOVector local_qiov; ++} BdrvRequestPadding; ++ ++static bool bdrv_init_padding(BlockDriverState *bs, ++ int64_t offset, int64_t bytes, ++ BdrvRequestPadding *pad) ++{ ++ uint64_t align = bs->bl.request_alignment; ++ size_t sum; ++ ++ memset(pad, 0, sizeof(*pad)); ++ ++ pad->head = offset & (align - 1); ++ pad->tail = ((offset + bytes) & (align - 1)); ++ if (pad->tail) { ++ pad->tail = align - pad->tail; ++ } ++ ++ if ((!pad->head && !pad->tail) || !bytes) { ++ return false; ++ } ++ ++ sum = pad->head + bytes + pad->tail; ++ pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align; ++ pad->buf = qemu_blockalign(bs, pad->buf_len); ++ pad->merge_reads = sum == pad->buf_len; ++ if (pad->tail) { ++ pad->tail_buf = pad->buf + pad->buf_len - align; ++ } ++ ++ return true; ++} ++ ++static int bdrv_padding_rmw_read(BdrvChild *child, ++ BdrvTrackedRequest *req, ++ BdrvRequestPadding *pad, ++ bool zero_middle) ++{ ++ QEMUIOVector local_qiov; ++ BlockDriverState *bs = child->bs; ++ uint64_t align = bs->bl.request_alignment; ++ int ret; ++ ++ assert(req->serialising && pad->buf); ++ ++ if (pad->head || pad->merge_reads) { ++ uint64_t bytes = pad->merge_reads ? pad->buf_len : align; ++ ++ qemu_iovec_init_buf(&local_qiov, pad->buf, bytes); ++ ++ if (pad->head) { ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); ++ } ++ if (pad->merge_reads && pad->tail) { ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); ++ } ++ ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes, ++ align, &local_qiov, 0); ++ if (ret < 0) { ++ return ret; ++ } ++ if (pad->head) { ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); ++ } ++ if (pad->merge_reads && pad->tail) { ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); ++ } ++ ++ if (pad->merge_reads) { ++ goto zero_mem; ++ } ++ } ++ ++ if (pad->tail) { ++ qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align); ++ ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); ++ ret = bdrv_aligned_preadv( ++ child, req, ++ req->overlap_offset + req->overlap_bytes - align, ++ align, align, &local_qiov, 0); ++ if (ret < 0) { ++ return ret; ++ } ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); ++ } ++ ++zero_mem: ++ if (zero_middle) { ++ memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail); ++ } ++ ++ return 0; ++} ++ ++static void bdrv_padding_destroy(BdrvRequestPadding *pad) ++{ ++ if (pad->buf) { ++ qemu_vfree(pad->buf); ++ qemu_iovec_destroy(&pad->local_qiov); ++ } ++} ++ ++/* ++ * bdrv_pad_request ++ * ++ * Exchange request parameters with padded request if needed. Don't include RMW ++ * read of padding, bdrv_padding_rmw_read() should be called separately if ++ * needed. ++ * ++ * All parameters except @bs are in-out: they represent original request at ++ * function call and padded (if padding needed) at function finish. ++ * ++ * Function always succeeds. + */ ++static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov, ++ int64_t *offset, unsigned int *bytes, ++ BdrvRequestPadding *pad) ++{ ++ if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { ++ return false; ++ } ++ ++ qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, ++ *qiov, 0, *bytes, ++ pad->buf + pad->buf_len - pad->tail, pad->tail); ++ *bytes += pad->head + pad->tail; ++ *offset -= pad->head; ++ *qiov = &pad->local_qiov; ++ ++ return true; ++} ++ + int coroutine_fn bdrv_co_preadv(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) + { + BlockDriverState *bs = child->bs; +- BlockDriver *drv = bs->drv; + BdrvTrackedRequest req; +- +- uint64_t align = bs->bl.request_alignment; +- uint8_t *head_buf = NULL; +- uint8_t *tail_buf = NULL; +- QEMUIOVector local_qiov; +- bool use_local_qiov = false; ++ BdrvRequestPadding pad; + int ret; + +- trace_bdrv_co_preadv(child->bs, offset, bytes, flags); +- +- if (!drv) { +- return -ENOMEDIUM; +- } ++ trace_bdrv_co_preadv(bs, offset, bytes, flags); + + ret = bdrv_check_byte_request(bs, offset, bytes); + if (ret < 0) { +@@ -1443,43 +1592,16 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, + flags |= BDRV_REQ_COPY_ON_READ; + } + +- /* Align read if necessary by padding qiov */ +- if (offset & (align - 1)) { +- head_buf = qemu_blockalign(bs, align); +- qemu_iovec_init(&local_qiov, qiov->niov + 2); +- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); +- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); +- use_local_qiov = true; +- +- bytes += offset & (align - 1); +- offset = offset & ~(align - 1); +- } +- +- if ((offset + bytes) & (align - 1)) { +- if (!use_local_qiov) { +- qemu_iovec_init(&local_qiov, qiov->niov + 1); +- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); +- use_local_qiov = true; +- } +- tail_buf = qemu_blockalign(bs, align); +- qemu_iovec_add(&local_qiov, tail_buf, +- align - ((offset + bytes) & (align - 1))); +- +- bytes = ROUND_UP(bytes, align); +- } ++ bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad); + + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); +- ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, +- use_local_qiov ? &local_qiov : qiov, +- flags); ++ ret = bdrv_aligned_preadv(child, &req, offset, bytes, ++ bs->bl.request_alignment, ++ qiov, flags); + tracked_request_end(&req); + bdrv_dec_in_flight(bs); + +- if (use_local_qiov) { +- qemu_iovec_destroy(&local_qiov); +- qemu_vfree(head_buf); +- qemu_vfree(tail_buf); +- } ++ bdrv_padding_destroy(&pad); + + return ret; + } +@@ -1775,44 +1897,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, + BdrvTrackedRequest *req) + { + BlockDriverState *bs = child->bs; +- uint8_t *buf = NULL; + QEMUIOVector local_qiov; + uint64_t align = bs->bl.request_alignment; +- unsigned int head_padding_bytes, tail_padding_bytes; + int ret = 0; ++ bool padding; ++ BdrvRequestPadding pad; + +- head_padding_bytes = offset & (align - 1); +- tail_padding_bytes = (align - (offset + bytes)) & (align - 1); +- +- +- assert(flags & BDRV_REQ_ZERO_WRITE); +- if (head_padding_bytes || tail_padding_bytes) { +- buf = qemu_blockalign(bs, align); +- qemu_iovec_init_buf(&local_qiov, buf, align); +- } +- if (head_padding_bytes) { +- uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); +- +- /* RMW the unaligned part before head. */ ++ padding = bdrv_init_padding(bs, offset, bytes, &pad); ++ if (padding) { + mark_request_serialising(req, align); + wait_serialising_requests(req); +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); +- ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, +- align, &local_qiov, 0); +- if (ret < 0) { +- goto fail; +- } +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + +- memset(buf + head_padding_bytes, 0, zero_bytes); +- ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, +- align, &local_qiov, +- flags & ~BDRV_REQ_ZERO_WRITE); +- if (ret < 0) { +- goto fail; ++ bdrv_padding_rmw_read(child, req, &pad, true); ++ ++ if (pad.head || pad.merge_reads) { ++ int64_t aligned_offset = offset & ~(align - 1); ++ int64_t write_bytes = pad.merge_reads ? pad.buf_len : align; ++ ++ qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes); ++ ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes, ++ align, &local_qiov, ++ flags & ~BDRV_REQ_ZERO_WRITE); ++ if (ret < 0 || pad.merge_reads) { ++ /* Error or all work is done */ ++ goto out; ++ } ++ offset += write_bytes - pad.head; ++ bytes -= write_bytes - pad.head; + } +- offset += zero_bytes; +- bytes -= zero_bytes; + } + + assert(!bytes || (offset & (align - 1)) == 0); +@@ -1822,7 +1934,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, + ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, + NULL, flags); + if (ret < 0) { +- goto fail; ++ goto out; + } + bytes -= aligned_bytes; + offset += aligned_bytes; +@@ -1830,26 +1942,17 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, + + assert(!bytes || (offset & (align - 1)) == 0); + if (bytes) { +- assert(align == tail_padding_bytes + bytes); +- /* RMW the unaligned part after tail. */ +- mark_request_serialising(req, align); +- wait_serialising_requests(req); +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); +- ret = bdrv_aligned_preadv(child, req, offset, align, +- align, &local_qiov, 0); +- if (ret < 0) { +- goto fail; +- } +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); ++ assert(align == pad.tail + bytes); + +- memset(buf, 0, bytes); ++ qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align); + ret = bdrv_aligned_pwritev(child, req, offset, align, align, + &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); + } +-fail: +- qemu_vfree(buf); +- return ret; + ++out: ++ bdrv_padding_destroy(&pad); ++ ++ return ret; + } + + /* +@@ -1862,10 +1965,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + BlockDriverState *bs = child->bs; + BdrvTrackedRequest req; + uint64_t align = bs->bl.request_alignment; +- uint8_t *head_buf = NULL; +- uint8_t *tail_buf = NULL; +- QEMUIOVector local_qiov; +- bool use_local_qiov = false; ++ BdrvRequestPadding pad; + int ret; + + trace_bdrv_co_pwritev(child->bs, offset, bytes, flags); +@@ -1892,86 +1992,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + goto out; + } + +- if (offset & (align - 1)) { +- QEMUIOVector head_qiov; +- ++ if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) { + mark_request_serialising(&req, align); + wait_serialising_requests(&req); +- +- head_buf = qemu_blockalign(bs, align); +- qemu_iovec_init_buf(&head_qiov, head_buf, align); +- +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); +- ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, +- align, &head_qiov, 0); +- if (ret < 0) { +- goto fail; +- } +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); +- +- qemu_iovec_init(&local_qiov, qiov->niov + 2); +- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); +- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); +- use_local_qiov = true; +- +- bytes += offset & (align - 1); +- offset = offset & ~(align - 1); +- +- /* We have read the tail already if the request is smaller +- * than one aligned block. +- */ +- if (bytes < align) { +- qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes); +- bytes = align; +- } +- } +- +- if ((offset + bytes) & (align - 1)) { +- QEMUIOVector tail_qiov; +- size_t tail_bytes; +- bool waited; +- +- mark_request_serialising(&req, align); +- waited = wait_serialising_requests(&req); +- assert(!waited || !use_local_qiov); +- +- tail_buf = qemu_blockalign(bs, align); +- qemu_iovec_init_buf(&tail_qiov, tail_buf, align); +- +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); +- ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), +- align, align, &tail_qiov, 0); +- if (ret < 0) { +- goto fail; +- } +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); +- +- if (!use_local_qiov) { +- qemu_iovec_init(&local_qiov, qiov->niov + 1); +- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); +- use_local_qiov = true; +- } +- +- tail_bytes = (offset + bytes) & (align - 1); +- qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); +- +- bytes = ROUND_UP(bytes, align); ++ bdrv_padding_rmw_read(child, &req, &pad, false); + } + + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, +- use_local_qiov ? &local_qiov : qiov, +- flags); ++ qiov, flags); + +-fail: ++ bdrv_padding_destroy(&pad); + +- if (use_local_qiov) { +- qemu_iovec_destroy(&local_qiov); +- } +- qemu_vfree(head_buf); +- qemu_vfree(tail_buf); + out: + tracked_request_end(&req); + bdrv_dec_in_flight(bs); ++ + return ret; + } + +-- +2.23.0 diff --git a/block-nfs-tear-down-aio-before-nfs_close.patch b/block-nfs-tear-down-aio-before-nfs_close.patch new file mode 100644 index 0000000..ea116d0 --- /dev/null +++ b/block-nfs-tear-down-aio-before-nfs_close.patch @@ -0,0 +1,41 @@ +From 0694c489cd240620fee5675e8d24c7ce02d1d67d Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Tue, 10 Sep 2019 17:41:09 +0200 +Subject: [PATCH] block/nfs: tear down aio before nfs_close + +nfs_close is a sync call from libnfs and has its own event +handler polling on the nfs FD. Avoid that both QEMU and libnfs +are intefering here. + +CC: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Signed-off-by: Kevin Wolf +(cherry picked from commit 601dc6559725f7a614b6f893611e17ff0908e914) +Signed-off-by: Michael Roth +--- + block/nfs.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/block/nfs.c b/block/nfs.c +index d93241b3bb..2b7a078241 100644 +--- a/block/nfs.c ++++ b/block/nfs.c +@@ -390,12 +390,14 @@ static void nfs_attach_aio_context(BlockDriverState *bs, + static void nfs_client_close(NFSClient *client) + { + if (client->context) { ++ qemu_mutex_lock(&client->mutex); ++ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), ++ false, NULL, NULL, NULL, NULL); ++ qemu_mutex_unlock(&client->mutex); + if (client->fh) { + nfs_close(client->context, client->fh); + client->fh = NULL; + } +- aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), +- false, NULL, NULL, NULL, NULL); + nfs_destroy_context(client->context); + client->context = NULL; + } +-- +2.23.0 diff --git a/block-posix-Always-allocate-the-first-block.patch b/block-posix-Always-allocate-the-first-block.patch new file mode 100644 index 0000000..166d739 --- /dev/null +++ b/block-posix-Always-allocate-the-first-block.patch @@ -0,0 +1,343 @@ +From 3d018ff3bdd8aec260254036b600cfa8d694ced4 Mon Sep 17 00:00:00 2001 +From: Nir Soffer +Date: Tue, 27 Aug 2019 04:05:27 +0300 +Subject: [PATCH] block: posix: Always allocate the first block + +When creating an image with preallocation "off" or "falloc", the first +block of the image is typically not allocated. When using Gluster +storage backed by XFS filesystem, reading this block using direct I/O +succeeds regardless of request length, fooling alignment detection. + +In this case we fallback to a safe value (4096) instead of the optimal +value (512), which may lead to unneeded data copying when aligning +requests. Allocating the first block avoids the fallback. + +Since we allocate the first block even with preallocation=off, we no +longer create images with zero disk size: + + $ ./qemu-img create -f raw test.raw 1g + Formatting 'test.raw', fmt=raw size=1073741824 + + $ ls -lhs test.raw + 4.0K -rw-r--r--. 1 nsoffer nsoffer 1.0G Aug 16 23:48 test.raw + +And converting the image requires additional cluster: + + $ ./qemu-img measure -f raw -O qcow2 test.raw + required size: 458752 + fully allocated size: 1074135040 + +When using format like vmdk with multiple files per image, we allocate +one block per file: + + $ ./qemu-img create -f vmdk -o subformat=twoGbMaxExtentFlat test.vmdk 4g + Formatting 'test.vmdk', fmt=vmdk size=4294967296 compat6=off hwversion=undefined subformat=twoGbMaxExtentFlat + + $ ls -lhs test*.vmdk + 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f001.vmdk + 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f002.vmdk + 4.0K -rw-r--r--. 1 nsoffer nsoffer 353 Aug 27 03:23 test.vmdk + +I did quick performance test for copying disks with qemu-img convert to +new raw target image to Gluster storage with sector size of 512 bytes: + + for i in $(seq 10); do + rm -f dst.raw + sleep 10 + time ./qemu-img convert -f raw -O raw -t none -T none src.raw dst.raw + done + +Here is a table comparing the total time spent: + +Type Before(s) After(s) Diff(%) +--------------------------------------- +real 530.028 469.123 -11.4 +user 17.204 10.768 -37.4 +sys 17.881 7.011 -60.7 + +We can see very clear improvement in CPU usage. + +Signed-off-by: Nir Soffer +Message-id: 20190827010528.8818-2-nsoffer@redhat.com +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz + +(cherry picked from commit 3a20013fbb26d2a1bd11ef148eefdb1508783787) + +Signed-off-by: Michael Roth +--- + block/file-posix.c | 51 +++++++++++++++++++ + tests/qemu-iotests/059.out | 2 +- + tests/qemu-iotests/{150.out => 150.out.qcow2} | 0 + tests/qemu-iotests/150.out.raw | 12 +++++ + tests/qemu-iotests/175 | 19 ++++--- + tests/qemu-iotests/175.out | 8 +-- + tests/qemu-iotests/178.out.qcow2 | 4 +- + tests/qemu-iotests/221.out | 12 +++-- + tests/qemu-iotests/253.out | 12 +++-- + 9 files changed, 99 insertions(+), 21 deletions(-) + rename tests/qemu-iotests/{150.out => 150.out.qcow2} (100%) + create mode 100644 tests/qemu-iotests/150.out.raw + +diff --git a/block/file-posix.c b/block/file-posix.c +index be32dd8c51..2184aa980c 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1674,6 +1674,43 @@ static int handle_aiocb_discard(void *opaque) + return ret; + } + ++/* ++ * Help alignment probing by allocating the first block. ++ * ++ * When reading with direct I/O from unallocated area on Gluster backed by XFS, ++ * reading succeeds regardless of request length. In this case we fallback to ++ * safe alignment which is not optimal. Allocating the first block avoids this ++ * fallback. ++ * ++ * fd may be opened with O_DIRECT, but we don't know the buffer alignment or ++ * request alignment, so we use safe values. ++ * ++ * Returns: 0 on success, -errno on failure. Since this is an optimization, ++ * caller may ignore failures. ++ */ ++static int allocate_first_block(int fd, size_t max_size) ++{ ++ size_t write_size = (max_size < MAX_BLOCKSIZE) ++ ? BDRV_SECTOR_SIZE ++ : MAX_BLOCKSIZE; ++ size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); ++ void *buf; ++ ssize_t n; ++ int ret; ++ ++ buf = qemu_memalign(max_align, write_size); ++ memset(buf, 0, write_size); ++ ++ do { ++ n = pwrite(fd, buf, write_size, 0); ++ } while (n == -1 && errno == EINTR); ++ ++ ret = (n == -1) ? -errno : 0; ++ ++ qemu_vfree(buf); ++ return ret; ++} ++ + static int handle_aiocb_truncate(void *opaque) + { + RawPosixAIOData *aiocb = opaque; +@@ -1713,6 +1750,17 @@ static int handle_aiocb_truncate(void *opaque) + /* posix_fallocate() doesn't set errno. */ + error_setg_errno(errp, -result, + "Could not preallocate new data"); ++ } else if (current_length == 0) { ++ /* ++ * posix_fallocate() uses fallocate() if the filesystem ++ * supports it, or fallback to manually writing zeroes. If ++ * fallocate() was used, unaligned reads from the fallocated ++ * area in raw_probe_alignment() will succeed, hence we need to ++ * allocate the first block. ++ * ++ * Optimize future alignment probing; ignore failures. ++ */ ++ allocate_first_block(fd, offset); + } + } else { + result = 0; +@@ -1774,6 +1822,9 @@ static int handle_aiocb_truncate(void *opaque) + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); ++ } else if (current_length == 0 && offset > current_length) { ++ /* Optimize future alignment probing; ignore failures. */ ++ allocate_first_block(fd, offset); + } + return result; + default: +diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out +index 4fab42a28c..fe3f861f3c 100644 +--- a/tests/qemu-iotests/059.out ++++ b/tests/qemu-iotests/059.out +@@ -27,7 +27,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824000 subformat=twoGbMax + image: TEST_DIR/t.vmdk + file format: vmdk + virtual size: 0.977 TiB (1073741824000 bytes) +-disk size: 16 KiB ++disk size: 1.97 MiB + Format specific information: + cid: XXXXXXXX + parent cid: XXXXXXXX +diff --git a/tests/qemu-iotests/150.out b/tests/qemu-iotests/150.out.qcow2 +similarity index 100% +rename from tests/qemu-iotests/150.out +rename to tests/qemu-iotests/150.out.qcow2 +diff --git a/tests/qemu-iotests/150.out.raw b/tests/qemu-iotests/150.out.raw +new file mode 100644 +index 0000000000..3cdc7727a5 +--- /dev/null ++++ b/tests/qemu-iotests/150.out.raw +@@ -0,0 +1,12 @@ ++QA output created by 150 ++ ++=== Mapping sparse conversion === ++ ++Offset Length File ++0 0x1000 TEST_DIR/t.IMGFMT ++ ++=== Mapping non-sparse conversion === ++ ++Offset Length File ++0 0x100000 TEST_DIR/t.IMGFMT ++*** done +diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 +index 51e62c8276..7ba28b3c1b 100755 +--- a/tests/qemu-iotests/175 ++++ b/tests/qemu-iotests/175 +@@ -37,14 +37,16 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 + # the file size. This function hides the resulting difference in the + # stat -c '%b' output. + # Parameter 1: Number of blocks an empty file occupies +-# Parameter 2: Image size in bytes ++# Parameter 2: Minimal number of blocks in an image ++# Parameter 3: Image size in bytes + _filter_blocks() + { + extra_blocks=$1 +- img_size=$2 ++ min_blocks=$2 ++ img_size=$3 + +- sed -e "s/blocks=$extra_blocks\\(\$\\|[^0-9]\\)/nothing allocated/" \ +- -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/everything allocated/" ++ sed -e "s/blocks=$min_blocks\\(\$\\|[^0-9]\\)/min allocation/" \ ++ -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/max allocation/" + } + + # get standard environment, filters and checks +@@ -60,16 +62,21 @@ size=$((1 * 1024 * 1024)) + touch "$TEST_DIR/empty" + extra_blocks=$(stat -c '%b' "$TEST_DIR/empty") + ++# We always write the first byte; check how many blocks this filesystem ++# allocates to match empty image alloation. ++printf "\0" > "$TEST_DIR/empty" ++min_blocks=$(stat -c '%b' "$TEST_DIR/empty") ++ + echo + echo "== creating image with default preallocation ==" + _make_test_img $size | _filter_imgfmt +-stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size ++stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size + + for mode in off full falloc; do + echo + echo "== creating image with preallocation $mode ==" + IMGOPTS=preallocation=$mode _make_test_img $size | _filter_imgfmt +- stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size ++ stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size + done + + # success, all done +diff --git a/tests/qemu-iotests/175.out b/tests/qemu-iotests/175.out +index 6d9a5ed84e..263e521262 100644 +--- a/tests/qemu-iotests/175.out ++++ b/tests/qemu-iotests/175.out +@@ -2,17 +2,17 @@ QA output created by 175 + + == creating image with default preallocation == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +-size=1048576, nothing allocated ++size=1048576, min allocation + + == creating image with preallocation off == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=off +-size=1048576, nothing allocated ++size=1048576, min allocation + + == creating image with preallocation full == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=full +-size=1048576, everything allocated ++size=1048576, max allocation + + == creating image with preallocation falloc == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=falloc +-size=1048576, everything allocated ++size=1048576, max allocation + *** done +diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 +index 55a8dc926f..9e7d8c44df 100644 +--- a/tests/qemu-iotests/178.out.qcow2 ++++ b/tests/qemu-iotests/178.out.qcow2 +@@ -101,7 +101,7 @@ converted image file size in bytes: 196608 + == raw input image with data (human) == + + Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 +-required size: 393216 ++required size: 458752 + fully allocated size: 1074135040 + wrote 512/512 bytes at offset 512 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +@@ -257,7 +257,7 @@ converted image file size in bytes: 196608 + + Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 + { +- "required": 393216, ++ "required": 458752, + "fully-allocated": 1074135040 + } + wrote 512/512 bytes at offset 512 +diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out +index 9f9dd52bb0..dca024a0c3 100644 +--- a/tests/qemu-iotests/221.out ++++ b/tests/qemu-iotests/221.out +@@ -3,14 +3,18 @@ QA output created by 221 + === Check mapping of unaligned raw image === + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65537 +-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + wrote 1/1 bytes at offset 65536 + 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, + { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, + { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + *** done +diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out +index 607c0baa0b..3d08b305d7 100644 +--- a/tests/qemu-iotests/253.out ++++ b/tests/qemu-iotests/253.out +@@ -3,12 +3,16 @@ QA output created by 253 + === Check mapping of unaligned raw image === + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575 +-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + wrote 65535/65535 bytes at offset 983040 + 63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +-[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] + *** done +-- +2.23.0 diff --git a/block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch b/block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch new file mode 100644 index 0000000..f77cc06 --- /dev/null +++ b/block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch @@ -0,0 +1,66 @@ +From 84f22c728520792f1010074e0d5ac2ec8e2e372c Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Sun, 15 Sep 2019 23:36:53 +0300 +Subject: [PATCH] block/qcow2: Fix corruption introduced by commit 8ac0f15f335 + +This fixes subtle corruption introduced by luks threaded encryption +in commit 8ac0f15f335 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1745922 + +The corruption happens when we do a write that + * writes to two or more unallocated clusters at once + * doesn't fully cover the first sector + * doesn't fully cover the last sector + * uses luks encryption + +In this case, when allocating the new clusters we COW both areas +prior to the write and after the write, and we encrypt them. + +The above mentioned commit accidentally made it so we encrypt the +second COW area using the physical cluster offset of the first area. + +The problem is that offset_in_cluster in do_perform_cow_encrypt +can be larger that the cluster size, thus cluster_offset +will no longer point to the start of the cluster at which encrypted +area starts. + +Next patch in this series will refactor the code to avoid all these +assumptions. + +In the bugreport that was triggered by rebasing a luks image to new, +zero filled base, which lot of such writes, and causes some files +with zero areas to contain garbage there instead. +But as described above it can happen elsewhere as well + +Signed-off-by: Maxim Levitsky +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-id: 20190915203655.21638-2-mlevitsk@redhat.com +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 38e7d54bdc518b5a05a922467304bcace2396945) +Signed-off-by: Michael Roth +--- + block/qcow2-cluster.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index cc5609e27a..760564c8fb 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -473,9 +473,10 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, + assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); + assert((bytes & ~BDRV_SECTOR_MASK) == 0); + assert(s->crypto); +- if (qcow2_co_encrypt(bs, cluster_offset, +- src_cluster_offset + offset_in_cluster, +- buffer, bytes) < 0) { ++ if (qcow2_co_encrypt(bs, ++ start_of_cluster(s, cluster_offset + offset_in_cluster), ++ src_cluster_offset + offset_in_cluster, ++ buffer, bytes) < 0) { + return false; + } + } +-- +2.23.0 diff --git a/block-snapshot-Restrict-set-of-snapshot-nodes.patch b/block-snapshot-Restrict-set-of-snapshot-nodes.patch new file mode 100644 index 0000000..c29f30a --- /dev/null +++ b/block-snapshot-Restrict-set-of-snapshot-nodes.patch @@ -0,0 +1,124 @@ +From 7a8aa6c734bb1c2927ad0cc1d10bcacb53cf4ae3 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 17 Sep 2019 12:26:23 +0200 +Subject: [PATCH] block/snapshot: Restrict set of snapshot nodes + +Nodes involved in internal snapshots were those that were returned by +bdrv_next(), inserted and not read-only. bdrv_next() in turn returns all +nodes that are either the root node of a BlockBackend or monitor-owned +nodes. + +With the typical -drive use, this worked well enough. However, in the +typical -blockdev case, the user defines one node per option, making all +nodes monitor-owned nodes. This includes protocol nodes etc. which often +are not snapshottable, so "savevm" only returns an error. + +Change the conditions so that internal snapshot still include all nodes +that have a BlockBackend attached (we definitely want to snapshot +anything attached to a guest device and probably also the built-in NBD +server; snapshotting block job BlockBackends is more of an accident, but +a preexisting one), but other monitor-owned nodes are only included if +they have no parents. + +This makes internal snapshots usable again with typical -blockdev +configurations. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Peter Krempa +Tested-by: Peter Krempa +(cherry picked from commit 05f4aced658a02b02d3e89a6c7a2281008fcf26c) +Signed-off-by: Michael Roth +--- + block/snapshot.c | 26 +++++++++++++++++++------- + 1 file changed, 19 insertions(+), 7 deletions(-) + +diff --git a/block/snapshot.c b/block/snapshot.c +index f2f48f926a..8081616ae9 100644 +--- a/block/snapshot.c ++++ b/block/snapshot.c +@@ -31,6 +31,7 @@ + #include "qapi/qmp/qerror.h" + #include "qapi/qmp/qstring.h" + #include "qemu/option.h" ++#include "sysemu/block-backend.h" + + QemuOptsList internal_snapshot_opts = { + .name = "snapshot", +@@ -384,6 +385,16 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, + return ret; + } + ++static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs) ++{ ++ if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { ++ return false; ++ } ++ ++ /* Include all nodes that are either in use by a BlockBackend, or that ++ * aren't attached to any node, but owned by the monitor. */ ++ return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents); ++} + + /* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire +@@ -399,7 +410,7 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); +- if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) { ++ if (bdrv_all_snapshots_includes_bs(bs)) { + ok = bdrv_can_snapshot(bs); + } + aio_context_release(ctx); +@@ -426,8 +437,9 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); +- if (bdrv_can_snapshot(bs) && +- bdrv_snapshot_find(bs, snapshot, name) >= 0) { ++ if (bdrv_all_snapshots_includes_bs(bs) && ++ bdrv_snapshot_find(bs, snapshot, name) >= 0) ++ { + ret = bdrv_snapshot_delete(bs, snapshot->id_str, + snapshot->name, err); + } +@@ -455,7 +467,7 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); +- if (bdrv_can_snapshot(bs)) { ++ if (bdrv_all_snapshots_includes_bs(bs)) { + ret = bdrv_snapshot_goto(bs, name, errp); + } + aio_context_release(ctx); +@@ -481,7 +493,7 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); +- if (bdrv_can_snapshot(bs)) { ++ if (bdrv_all_snapshots_includes_bs(bs)) { + err = bdrv_snapshot_find(bs, &sn, name); + } + aio_context_release(ctx); +@@ -512,7 +524,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + if (bs == vm_state_bs) { + sn->vm_state_size = vm_state_size; + err = bdrv_snapshot_create(bs, sn); +- } else if (bdrv_can_snapshot(bs)) { ++ } else if (bdrv_all_snapshots_includes_bs(bs)) { + sn->vm_state_size = 0; + err = bdrv_snapshot_create(bs, sn); + } +@@ -538,7 +550,7 @@ BlockDriverState *bdrv_all_find_vmstate_bs(void) + bool found; + + aio_context_acquire(ctx); +- found = bdrv_can_snapshot(bs); ++ found = bdrv_all_snapshots_includes_bs(bs) && bdrv_can_snapshot(bs); + aio_context_release(ctx); + + if (found) { +-- +2.23.0 diff --git a/blockjob-update-nodes-head-while-removing-all-bdrv.patch b/blockjob-update-nodes-head-while-removing-all-bdrv.patch new file mode 100644 index 0000000..36cedc7 --- /dev/null +++ b/blockjob-update-nodes-head-while-removing-all-bdrv.patch @@ -0,0 +1,61 @@ +From 86b0f4022bb43b16979ba5300e8d40a1e6d44b79 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Wed, 11 Sep 2019 12:03:16 +0200 +Subject: [PATCH] blockjob: update nodes head while removing all bdrv + +block_job_remove_all_bdrv() iterates through job->nodes, calling +bdrv_root_unref_child() for each entry. The call to the latter may +reach child_job_[can_]set_aio_ctx(), which will also attempt to +traverse job->nodes, potentially finding entries that where freed +on previous iterations. + +To avoid this situation, update job->nodes head on each iteration to +ensure that already freed entries are no longer linked to the list. + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1746631 +Signed-off-by: Sergio Lopez +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20190911100316.32282-1-mreitz@redhat.com +Reviewed-by: Sergio Lopez +Signed-off-by: Max Reitz +(cherry picked from commit d876bf676f5e7c6aa9ac64555e48cba8734ecb2f) +Signed-off-by: Michael Roth +--- + blockjob.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/blockjob.c b/blockjob.c +index 20b7f557da..74abb97bfd 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -186,14 +186,23 @@ static const BdrvChildRole child_job = { + + void block_job_remove_all_bdrv(BlockJob *job) + { +- GSList *l; +- for (l = job->nodes; l; l = l->next) { ++ /* ++ * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(), ++ * which will also traverse job->nodes, so consume the list one by ++ * one to make sure that such a concurrent access does not attempt ++ * to process an already freed BdrvChild. ++ */ ++ while (job->nodes) { ++ GSList *l = job->nodes; + BdrvChild *c = l->data; ++ ++ job->nodes = l->next; ++ + bdrv_op_unblock_all(c->bs, job->blocker); + bdrv_root_unref_child(c); ++ ++ g_slist_free_1(l); + } +- g_slist_free(job->nodes); +- job->nodes = NULL; + } + + bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) +-- +2.23.0 diff --git a/coroutine-Add-qemu_co_mutex_assert_locked.patch b/coroutine-Add-qemu_co_mutex_assert_locked.patch new file mode 100644 index 0000000..fb1f258 --- /dev/null +++ b/coroutine-Add-qemu_co_mutex_assert_locked.patch @@ -0,0 +1,50 @@ +From e9bb3d942e268a19e03fc5d404586d2ed1564282 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 24 Oct 2019 16:26:57 +0200 +Subject: [PATCH] coroutine: Add qemu_co_mutex_assert_locked() + +Some functions require that the caller holds a certain CoMutex for them +to operate correctly. Add a function so that they can assert the lock is +really held. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Tested-by: Michael Weiser +Reviewed-by: Michael Weiser +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Denis V. Lunev +Reviewed-by: Max Reitz +(cherry picked from commit 944f3d5dd216fcd8cb007eddd4f82dced0a15b3d) +Signed-off-by: Michael Roth +--- + include/qemu/coroutine.h | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h +index 9801e7f5a4..f4843b5f59 100644 +--- a/include/qemu/coroutine.h ++++ b/include/qemu/coroutine.h +@@ -167,6 +167,21 @@ void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex); + */ + void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex); + ++/** ++ * Assert that the current coroutine holds @mutex. ++ */ ++static inline coroutine_fn void qemu_co_mutex_assert_locked(CoMutex *mutex) ++{ ++ /* ++ * mutex->holder doesn't need any synchronisation if the assertion holds ++ * true because the mutex protects it. If it doesn't hold true, we still ++ * don't mind if another thread takes or releases mutex behind our back, ++ * because the condition will be false no matter whether we read NULL or ++ * the pointer for any other coroutine. ++ */ ++ assert(atomic_read(&mutex->locked) && ++ mutex->holder == qemu_coroutine_self()); ++} + + /** + * CoQueues are a mechanism to queue coroutines in order to continue executing +-- +2.23.0 diff --git a/dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch b/dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch new file mode 100644 index 0000000..c61c9fd --- /dev/null +++ b/dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch @@ -0,0 +1,79 @@ +From fbde196c30e4797a51bda046ba514b187963d4ba Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 29 Jul 2019 23:34:16 +0200 +Subject: [PATCH] dma-helpers: ensure AIO callback is invoked after + cancellation + +dma_aio_cancel unschedules the BH if there is one, which corresponds +to the reschedule_dma case of dma_blk_cb. This can stall the DMA +permanently, because dma_complete will never get invoked and therefore +nobody will ever invoke the original AIO callback in dbs->common.cb. + +Fix this by invoking the callback (which is ensured to happen after +a bdrv_aio_cancel_async, or done manually in the dbs->bh case), and +add assertions to check that the DMA state machine is indeed waiting +for dma_complete or reschedule_dma, but never both. + +Reported-by: John Snow +Signed-off-by: Paolo Bonzini +Message-id: 20190729213416.1972-1-pbonzini@redhat.com +Signed-off-by: John Snow +(cherry picked from commit 539343c0a47e19d5dd64d846d64d084d9793681f) +Signed-off-by: Michael Roth +--- + dma-helpers.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/dma-helpers.c b/dma-helpers.c +index 2d7e02d35e..d3871dc61e 100644 +--- a/dma-helpers.c ++++ b/dma-helpers.c +@@ -90,6 +90,7 @@ static void reschedule_dma(void *opaque) + { + DMAAIOCB *dbs = (DMAAIOCB *)opaque; + ++ assert(!dbs->acb && dbs->bh); + qemu_bh_delete(dbs->bh); + dbs->bh = NULL; + dma_blk_cb(dbs, 0); +@@ -111,15 +112,12 @@ static void dma_complete(DMAAIOCB *dbs, int ret) + { + trace_dma_complete(dbs, ret, dbs->common.cb); + ++ assert(!dbs->acb && !dbs->bh); + dma_blk_unmap(dbs); + if (dbs->common.cb) { + dbs->common.cb(dbs->common.opaque, ret); + } + qemu_iovec_destroy(&dbs->iov); +- if (dbs->bh) { +- qemu_bh_delete(dbs->bh); +- dbs->bh = NULL; +- } + qemu_aio_unref(dbs); + } + +@@ -179,14 +177,21 @@ static void dma_aio_cancel(BlockAIOCB *acb) + + trace_dma_aio_cancel(dbs); + ++ assert(!(dbs->acb && dbs->bh)); + if (dbs->acb) { ++ /* This will invoke dma_blk_cb. */ + blk_aio_cancel_async(dbs->acb); ++ return; + } ++ + if (dbs->bh) { + cpu_unregister_map_client(dbs->bh); + qemu_bh_delete(dbs->bh); + dbs->bh = NULL; + } ++ if (dbs->common.cb) { ++ dbs->common.cb(dbs->common.opaque, -ECANCELED); ++ } + } + + static AioContext *dma_get_aio_context(BlockAIOCB *acb) +-- +2.23.0 diff --git a/hbitmap-handle-set-reset-with-zero-length.patch b/hbitmap-handle-set-reset-with-zero-length.patch new file mode 100644 index 0000000..b346a97 --- /dev/null +++ b/hbitmap-handle-set-reset-with-zero-length.patch @@ -0,0 +1,50 @@ +From c0b35d87de345bd3b59a44c604b247a0497f2fc0 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Fri, 11 Oct 2019 12:07:07 +0300 +Subject: [PATCH] hbitmap: handle set/reset with zero length + +Passing zero length to these functions leads to unpredicted results. +Zero-length set/reset may occur in active-mirror, on zero-length write +(which is unlikely, but not guaranteed to never happen). + +Let's just do nothing on zero-length request. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-id: 20191011090711.19940-2-vsementsov@virtuozzo.com +Reviewed-by: Max Reitz +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +(cherry picked from commit fed33bd175f663cc8c13f8a490a4f35a19756cfe) +Signed-off-by: Michael Roth +--- + util/hbitmap.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/util/hbitmap.c b/util/hbitmap.c +index 71c6ba2c52..c059313b9e 100644 +--- a/util/hbitmap.c ++++ b/util/hbitmap.c +@@ -387,6 +387,10 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) + uint64_t first, n; + uint64_t last = start + count - 1; + ++ if (count == 0) { ++ return; ++ } ++ + trace_hbitmap_set(hb, start, count, + start >> hb->granularity, last >> hb->granularity); + +@@ -478,6 +482,10 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) + uint64_t last = start + count - 1; + uint64_t gran = 1ULL << hb->granularity; + ++ if (count == 0) { ++ return; ++ } ++ + assert(QEMU_IS_ALIGNED(start, gran)); + assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size)); + +-- +2.23.0 diff --git a/hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch b/hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch new file mode 100644 index 0000000..e7ca51a --- /dev/null +++ b/hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch @@ -0,0 +1,47 @@ +From 220816989c1e3d490d293b8d7ac85dbc41a4c321 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 20 Sep 2019 18:40:39 +0100 +Subject: [PATCH] hw/arm/boot.c: Set NSACR.{CP11,CP10} for NS kernel boots + +If we're booting a Linux kernel directly into Non-Secure +state on a CPU which has Secure state, then make sure we +set the NSACR CP11 and CP10 bits, so that Non-Secure is allowed +to access the FPU. Otherwise an AArch32 kernel will UNDEF as +soon as it tries to use the FPU. + +It used to not matter that we didn't do this until commit +fc1120a7f5f2d4b6, where we implemented actually honouring +these NSACR bits. + +The problem only exists for CPUs where EL3 is AArch32; the +equivalent AArch64 trap bits are in CPTR_EL3 and are "0 to +not trap, 1 to trap", so the reset value of the register +permits NS access, unlike NSACR. + +Fixes: fc1120a7f5 +Fixes: https://bugs.launchpad.net/qemu/+bug/1844597 +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20190920174039.3916-1-peter.maydell@linaro.org +(cherry picked from commit ece628fcf69cbbd4b3efb6fbd203af07609467a2) +Signed-off-by: Michael Roth +--- + hw/arm/boot.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index c2b89b3bb9..fc4e021a38 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -754,6 +754,8 @@ static void do_cpu_reset(void *opaque) + (cs != first_cpu || !info->secure_board_setup)) { + /* Linux expects non-secure state */ + env->cp15.scr_el3 |= SCR_NS; ++ /* Set NSACR.{CP11,CP10} so NS can access the FPU */ ++ env->cp15.nsacr |= 3 << 10; + } + } + +-- +2.23.0 diff --git a/hw-core-loader-Fix-possible-crash-in-rom_copy.patch b/hw-core-loader-Fix-possible-crash-in-rom_copy.patch new file mode 100644 index 0000000..770f12b --- /dev/null +++ b/hw-core-loader-Fix-possible-crash-in-rom_copy.patch @@ -0,0 +1,45 @@ +From aae0faa5d3bee91c66dc4c1543190f55a242771e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 25 Sep 2019 14:16:43 +0200 +Subject: [PATCH] hw/core/loader: Fix possible crash in rom_copy() + +Both, "rom->addr" and "addr" are derived from the binary image +that can be loaded with the "-kernel" paramer. The code in +rom_copy() then calculates: + + d = dest + (rom->addr - addr); + +and uses "d" as destination in a memcpy() some lines later. Now with +bad kernel images, it is possible that rom->addr is smaller than addr, +thus "rom->addr - addr" gets negative and the memcpy() then tries to +copy contents from the image to a bad memory location. This could +maybe be used to inject code from a kernel image into the QEMU binary, +so we better fix it with an additional sanity check here. + +Cc: qemu-stable@nongnu.org +Reported-by: Guangming Liu +Buglink: https://bugs.launchpad.net/qemu/+bug/1844635 +Message-Id: <20190925130331.27825-1-thuth@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Thomas Huth +(cherry picked from commit e423455c4f23a1a828901c78fe6d03b7dde79319) +Signed-off-by: Michael Roth +--- + hw/core/loader.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/core/loader.c b/hw/core/loader.c +index 425bf69a99..838a34174a 100644 +--- a/hw/core/loader.c ++++ b/hw/core/loader.c +@@ -1242,7 +1242,7 @@ int rom_copy(uint8_t *dest, hwaddr addr, size_t size) + if (rom->addr + rom->romsize < addr) { + continue; + } +- if (rom->addr > end) { ++ if (rom->addr > end || rom->addr < addr) { + break; + } + +-- +2.23.0 diff --git a/libvhost-user-fix-SLAVE_SEND_FD-handling.patch b/libvhost-user-fix-SLAVE_SEND_FD-handling.patch new file mode 100644 index 0000000..71cbf7b --- /dev/null +++ b/libvhost-user-fix-SLAVE_SEND_FD-handling.patch @@ -0,0 +1,42 @@ +From 28a9a3558a427493049723fff390add7026653eb Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Tue, 3 Sep 2019 23:04:22 +0300 +Subject: [PATCH] libvhost-user: fix SLAVE_SEND_FD handling + +It doesn't look like this could possibly work properly since +VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD is defined to 10, but the +dev->protocol_features has a bitmap. I suppose the peer this +was tested with also supported VHOST_USER_PROTOCOL_F_LOG_SHMFD, +in which case the test would always be false, but nevertheless +the code seems wrong. + +Use has_feature() to fix this. + +Fixes: d84599f56c82 ("libvhost-user: support host notifier") +Signed-off-by: Johannes Berg +Message-Id: <20190903200422.11693-1-johannes@sipsolutions.net> +Reviewed-by: Tiwei Bie +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8726b70b449896f1211f869ec4f608904f027207) +Signed-off-by: Michael Roth +--- + contrib/libvhost-user/libvhost-user.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index 4b36e35a82..cb5f5770e4 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -1097,7 +1097,8 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + + vmsg.fd_num = fd_num; + +- if ((dev->protocol_features & VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) == 0) { ++ if (!has_feature(dev->protocol_features, ++ VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) { + return false; + } + +-- +2.23.0 diff --git a/make-release-pull-in-edk2-submodules-so-we-can-build.patch b/make-release-pull-in-edk2-submodules-so-we-can-build.patch new file mode 100644 index 0000000..70bcc86 --- /dev/null +++ b/make-release-pull-in-edk2-submodules-so-we-can-build.patch @@ -0,0 +1,60 @@ +From c5c9b1362d1652a9d0f79f6d9ae2f80d4b5fe432 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 12 Sep 2019 18:12:01 -0500 +Subject: [PATCH] make-release: pull in edk2 submodules so we can build it from + tarballs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The `make efi` target added by 536d2173 is built from the roms/edk2 +submodule, which in turn relies on additional submodules nested under +roms/edk2. + +The make-release script currently only pulls in top-level submodules, +so these nested submodules are missing in the resulting tarball. + +We could try to address this situation more generally by recursively +pulling in all submodules, but this doesn't necessarily ensure the +end-result will build properly (this case also required other changes). + +Additionally, due to the nature of submodules, we may not always have +control over how these sorts of things are dealt with, so for now we +continue to handle it on a case-by-case in the make-release script. + +Cc: Laszlo Ersek +Cc: Bruce Rogers +Cc: qemu-stable@nongnu.org # v4.1.0 +Reported-by: Bruce Rogers +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Michael Roth +Message-Id: <20190912231202.12327-2-mdroth@linux.vnet.ibm.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 45c61c6c23918e3b05ed9ecac5b2328ebae5f774) +Signed-off-by: Michael Roth +--- + scripts/make-release | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/scripts/make-release b/scripts/make-release +index b4af9c9e52..a2a8cda33c 100755 +--- a/scripts/make-release ++++ b/scripts/make-release +@@ -20,6 +20,14 @@ git checkout "v${version}" + git submodule update --init + (cd roms/seabios && git describe --tags --long --dirty > .version) + (cd roms/skiboot && ./make_version.sh > .version) ++# Fetch edk2 submodule's submodules, since it won't have access to them via ++# the tarball later. ++# ++# A more uniform way to handle this sort of situation would be nice, but we ++# don't necessarily have much control over how a submodule handles its ++# submodule dependencies, so we continue to handle these on a case-by-case ++# basis for now. ++(cd roms/edk2 && git submodule update --init) + popd + tar --exclude=.git -cjf ${destination}.tar.bz2 ${destination} + rm -rf ${destination} +-- +2.23.0 diff --git a/mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch b/mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch new file mode 100644 index 0000000..52f07f9 --- /dev/null +++ b/mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch @@ -0,0 +1,52 @@ +From e092a17d3825a8f2c93cb429aaa5d857b579b64c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 22 Jul 2019 17:44:27 +0200 +Subject: [PATCH] mirror: Keep mirror_top_bs drained after dropping permissions + +mirror_top_bs is currently implicitly drained through its connection to +the source or the target node. However, the drain section for target_bs +ends early after moving mirror_top_bs from src to target_bs, so that +requests can already be restarted while mirror_top_bs is still present +in the chain, but has dropped all permissions and therefore runs into an +assertion failure like this: + + qemu-system-x86_64: block/io.c:1634: bdrv_co_write_req_prepare: + Assertion `child->perm & BLK_PERM_WRITE' failed. + +Keep mirror_top_bs drained until all graph changes have completed. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit d2da5e288a2e71e82866c8fdefd41b5727300124) +Signed-off-by: Michael Roth +--- + block/mirror.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 0e3f7923cf..681b305de6 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -661,7 +661,10 @@ static int mirror_exit_common(Job *job) + s->target = NULL; + + /* We don't access the source any more. Dropping any WRITE/RESIZE is +- * required before it could become a backing file of target_bs. */ ++ * required before it could become a backing file of target_bs. Not having ++ * these permissions any more means that we can't allow any new requests on ++ * mirror_top_bs from now on, so keep it drained. */ ++ bdrv_drained_begin(mirror_top_bs); + bs_opaque->stop = true; + bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, + &error_abort); +@@ -729,6 +732,7 @@ static int mirror_exit_common(Job *job) + bs_opaque->job = NULL; + + bdrv_drained_end(src); ++ bdrv_drained_end(mirror_top_bs); + s->in_drain = false; + bdrv_unref(mirror_top_bs); + bdrv_unref(src); +-- +2.23.0 diff --git a/pc-Don-t-make-die-id-mandatory-unless-necessary.patch b/pc-Don-t-make-die-id-mandatory-unless-necessary.patch new file mode 100644 index 0000000..c51b40f --- /dev/null +++ b/pc-Don-t-make-die-id-mandatory-unless-necessary.patch @@ -0,0 +1,102 @@ +From 7ebcd375ade505358c1c45542de22f188c599bdd Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Fri, 16 Aug 2019 14:07:50 -0300 +Subject: [PATCH] pc: Don't make die-id mandatory unless necessary + +We have this issue reported when using libvirt to hotplug CPUs: +https://bugzilla.redhat.com/show_bug.cgi?id=1741451 + +Basically, libvirt is not copying die-id from +query-hotpluggable-cpus, but die-id is now mandatory. + +We could blame libvirt and say it is not following the documented +interface, because we have this buried in the QAPI schema +documentation: + +> Note: currently there are 5 properties that could be present +> but management should be prepared to pass through other +> properties with device_add command to allow for future +> interface extension. This also requires the filed names to be kept in +> sync with the properties passed to -device/device_add. + +But I don't think this would be reasonable from us. We can just +make QEMU more flexible and let die-id to be omitted when there's +no ambiguity. This will allow us to keep compatibility with +existing libvirt versions. + +Test case included to ensure we don't break this again. + +Fixes: commit 176d2cda0dee ("i386/cpu: Consolidate die-id validity in smp context") +Signed-off-by: Eduardo Habkost +Message-Id: <20190816170750.23910-1-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit fea374e7c8079563bca7c8fac895c6a880f76adc) +Signed-off-by: Michael Roth +--- + hw/i386/pc.c | 8 ++++++ + tests/acceptance/pc_cpu_hotplug_props.py | 35 ++++++++++++++++++++++++ + 2 files changed, 43 insertions(+) + create mode 100644 tests/acceptance/pc_cpu_hotplug_props.py + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 549c437050..947f81070f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2403,6 +2403,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + int max_socket = (ms->smp.max_cpus - 1) / + smp_threads / smp_cores / pcms->smp_dies; + ++ /* ++ * die-id was optional in QEMU 4.0 and older, so keep it optional ++ * if there's only one die per socket. ++ */ ++ if (cpu->die_id < 0 && pcms->smp_dies == 1) { ++ cpu->die_id = 0; ++ } ++ + if (cpu->socket_id < 0) { + error_setg(errp, "CPU socket-id is not set"); + return; +diff --git a/tests/acceptance/pc_cpu_hotplug_props.py b/tests/acceptance/pc_cpu_hotplug_props.py +new file mode 100644 +index 0000000000..08b7e632c6 +--- /dev/null ++++ b/tests/acceptance/pc_cpu_hotplug_props.py +@@ -0,0 +1,35 @@ ++# ++# Ensure CPU die-id can be omitted on -device ++# ++# Copyright (c) 2019 Red Hat Inc ++# ++# Author: ++# Eduardo Habkost ++# ++# This library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2 of the License, or (at your option) any later version. ++# ++# This library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with this library; if not, see . ++# ++ ++from avocado_qemu import Test ++ ++class OmittedCPUProps(Test): ++ """ ++ :avocado: tags=arch:x86_64 ++ """ ++ def test_no_die_id(self): ++ self.vm.add_args('-nodefaults', '-S') ++ self.vm.add_args('-smp', '1,sockets=2,cores=2,threads=2,maxcpus=8') ++ self.vm.add_args('-cpu', 'qemu64') ++ self.vm.add_args('-device', 'qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id=0') ++ self.vm.launch() ++ self.assertEquals(len(self.vm.command('query-cpus')), 2) +-- +2.23.0 diff --git a/pr-manager-Fix-invalid-g_free-crash-bug.patch b/pr-manager-Fix-invalid-g_free-crash-bug.patch new file mode 100644 index 0000000..b171cdb --- /dev/null +++ b/pr-manager-Fix-invalid-g_free-crash-bug.patch @@ -0,0 +1,39 @@ +From 57fdf4a13ff16d9d48a43f02a5e7b42e3d264f83 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Thu, 22 Aug 2019 15:38:46 +0200 +Subject: [PATCH] pr-manager: Fix invalid g_free() crash bug +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +pr_manager_worker() passes its @opaque argument to g_free(). Wrong; +it points to pr_manager_worker()'s automatic @data. Broken when +commit 2f3a7ab39be converted @data from heap- to stack-allocated. Fix +by deleting the g_free(). + +Fixes: 2f3a7ab39bec4ba8022dc4d42ea641165b004e3e +Cc: qemu-stable@nongnu.org +Signed-off-by: Markus Armbruster +Reviewed-by: Philippe Mathieu-Daudé +Acked-by: Paolo Bonzini +Signed-off-by: Kevin Wolf +(cherry picked from commit 6b9d62c2a9e83bbad73fb61406f0ff69b46ff6f3) +Signed-off-by: Michael Roth +--- + scsi/pr-manager.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c +index ee43663576..0c866e8698 100644 +--- a/scsi/pr-manager.c ++++ b/scsi/pr-manager.c +@@ -39,7 +39,6 @@ static int pr_manager_worker(void *opaque) + int fd = data->fd; + int r; + +- g_free(data); + trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]); + + /* The reference was taken in pr_manager_execute. */ +-- +2.23.0 diff --git a/qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch b/qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch new file mode 100644 index 0000000..f2a4e5c --- /dev/null +++ b/qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch @@ -0,0 +1,35 @@ +From 405deba14f6b61b9c557484b46e863308c8cf373 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 28 Oct 2019 17:18:40 +0100 +Subject: [PATCH] qcow2: Fix QCOW2_COMPRESSED_SECTOR_MASK + +Masks for L2 table entries should have 64 bit. + +Fixes: b6c246942b14d3e0dec46a6c5868ed84e7dbea19 +Buglink: https://bugs.launchpad.net/qemu/+bug/1850000 +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191028161841.1198-2-mreitz@redhat.com +Reviewed-by: Alberto Garcia +Signed-off-by: Max Reitz +(cherry picked from commit 24552feb6ae2f615b76c2b95394af43901f75046) +Signed-off-by: Michael Roth +--- + block/qcow2.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/qcow2.h b/block/qcow2.h +index fc1b0d3c1e..359197f89f 100644 +--- a/block/qcow2.h ++++ b/block/qcow2.h +@@ -77,7 +77,7 @@ + + /* Defined in the qcow2 spec (compressed cluster descriptor) */ + #define QCOW2_COMPRESSED_SECTOR_SIZE 512U +-#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1)) ++#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL)) + + /* Must be at least 2 to cover COW */ + #define MIN_L2_CACHE_SIZE 2 /* cache entries */ +-- +2.23.0 diff --git a/qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch b/qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch new file mode 100644 index 0000000..b4c2580 --- /dev/null +++ b/qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch @@ -0,0 +1,71 @@ +From 416a692e51b8b582407e30046ddcffbbe52ecf77 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 24 Oct 2019 16:26:58 +0200 +Subject: [PATCH] qcow2: Fix corruption bug in + qcow2_detect_metadata_preallocation() + +qcow2_detect_metadata_preallocation() calls qcow2_get_refcount() which +requires s->lock to be taken to protect its accesses to the refcount +table and refcount blocks. However, nothing in this code path actually +took the lock. This could cause the same cache entry to be used by two +requests at the same time, for different tables at different offsets, +resulting in image corruption. + +As it would be preferable to base the detection on consistent data (even +though it's just heuristics), let's take the lock not only around the +qcow2_get_refcount() calls, but around the whole function. + +This patch takes the lock in qcow2_co_block_status() earlier and asserts +in qcow2_detect_metadata_preallocation() that we hold the lock. + +Fixes: 69f47505ee66afaa513305de0c1895a224e52c45 +Cc: qemu-stable@nongnu.org +Reported-by: Michael Weiser +Signed-off-by: Kevin Wolf +Tested-by: Michael Weiser +Reviewed-by: Michael Weiser +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +(cherry picked from commit 5e9785505210e2477e590e61b1ab100d0ec22b01) +Signed-off-by: Michael Roth +--- + block/qcow2-refcount.c | 2 ++ + block/qcow2.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index ef965d7895..0d64bf5a5e 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -3455,6 +3455,8 @@ int qcow2_detect_metadata_preallocation(BlockDriverState *bs) + int64_t i, end_cluster, cluster_count = 0, threshold; + int64_t file_length, real_allocation, real_clusters; + ++ qemu_co_mutex_assert_locked(&s->lock); ++ + file_length = bdrv_getlength(bs->file->bs); + if (file_length < 0) { + return file_length; +diff --git a/block/qcow2.c b/block/qcow2.c +index 865839682c..c0f5439dc8 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1899,6 +1899,8 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, + unsigned int bytes; + int status = 0; + ++ qemu_co_mutex_lock(&s->lock); ++ + if (!s->metadata_preallocation_checked) { + ret = qcow2_detect_metadata_preallocation(bs); + s->metadata_preallocation = (ret == 1); +@@ -1906,7 +1908,6 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, + } + + bytes = MIN(INT_MAX, count); +- qemu_co_mutex_lock(&s->lock); + ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); + qemu_co_mutex_unlock(&s->lock); + if (ret < 0) { +-- +2.23.0 diff --git a/qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch b/qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch new file mode 100644 index 0000000..be2c3c7 --- /dev/null +++ b/qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch @@ -0,0 +1,58 @@ +From c9ffb12754b1575babfef45168b6e1b1af80a95f Mon Sep 17 00:00:00 2001 +From: Alberto Garcia +Date: Fri, 16 Aug 2019 15:17:42 +0300 +Subject: [PATCH] qcow2: Fix the calculation of the maximum L2 cache size + +The size of the qcow2 L2 cache defaults to 32 MB, which can be easily +larger than the maximum amount of L2 metadata that the image can have. +For example: with 64 KB clusters the user would need a qcow2 image +with a virtual size of 256 GB in order to have 32 MB of L2 metadata. + +Because of that, since commit b749562d9822d14ef69c9eaa5f85903010b86c30 +we forbid the L2 cache to become larger than the maximum amount of L2 +metadata for the image, calculated using this formula: + + uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); + +The problem with this formula is that the result should be rounded up +to the cluster size because an L2 table on disk always takes one full +cluster. + +For example, a 1280 MB qcow2 image with 64 KB clusters needs exactly +160 KB of L2 metadata, but we need 192 KB on disk (3 clusters) even if +the last 32 KB of those are not going to be used. + +However QEMU rounds the numbers down and only creates 2 cache tables +(128 KB), which is not enough for the image. + +A quick test doing 4KB random writes on a 1280 MB image gives me +around 500 IOPS, while with the correct cache size I get 16K IOPS. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Alberto Garcia +Signed-off-by: Kevin Wolf +(cherry picked from commit b70d08205b2e4044c529eefc21df2c8ab61b473b) +Signed-off-by: Michael Roth +--- + block/qcow2.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 039bdc2f7e..865839682c 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -826,7 +826,11 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, + bool l2_cache_entry_size_set; + int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; + uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; +- uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); ++ uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size); ++ /* An L2 table is always one cluster in size so the max cache size ++ * should be a multiple of the cluster size. */ ++ uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t), ++ s->cluster_size); + + combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); + l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); +-- +2.23.0 diff --git a/qemu.spec b/qemu.spec index d26cca2..5796df5 100644 --- a/qemu.spec +++ b/qemu.spec @@ -61,6 +61,46 @@ Patch0048: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch Patch0049: pcie-Compat-with-devices-which-do-not-support-Link-W.patch Patch0050: aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch Patch0051: async-use-explicit-memory-barriers.patch +Patch0052: dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch +Patch0053: Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch +Patch0054: pc-Don-t-make-die-id-mandatory-unless-necessary.patch +Patch0055: block-file-posix-Reduce-xfsctl-use.patch +Patch0056: pr-manager-Fix-invalid-g_free-crash-bug.patch +Patch0057: x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch +Patch0058: vpc-Return-0-from-vpc_co_create-on-success.patch +Patch0059: target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch +Patch0060: target-arm-Don-t-abort-on-M-profile-exception-return.patch +Patch0061: libvhost-user-fix-SLAVE_SEND_FD-handling.patch +Patch0062: qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch +Patch0063: block-nfs-tear-down-aio-before-nfs_close.patch +Patch0064: blockjob-update-nodes-head-while-removing-all-bdrv.patch +Patch0065: block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch +Patch0066: coroutine-Add-qemu_co_mutex_assert_locked.patch +Patch0067: qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch +Patch0068: hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch +Patch0069: make-release-pull-in-edk2-submodules-so-we-can-build.patch +Patch0070: roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch +Patch0071: block-snapshot-Restrict-set-of-snapshot-nodes.patch +Patch0072: vhost-user-save-features-if-the-char-dev-is-closed.patch +Patch0073: hw-core-loader-Fix-possible-crash-in-rom_copy.patch +Patch0074: ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch +Patch0075: virtio-new-post_load-hook.patch +Patch0076: virtio-net-prevent-offloads-reset-on-migration.patch +Patch0077: util-hbitmap-strict-hbitmap_reset.patch +Patch0078: hbitmap-handle-set-reset-with-zero-length.patch +Patch0079: target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch +Patch0080: scsi-lsi-exit-infinite-loop-while-executing-script-C.patch +Patch0081: virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch +Patch0082: qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch +Patch0083: util-iov-introduce-qemu_iovec_init_extended.patch +Patch0084: util-iov-improve-qemu_iovec_is_zero.patch +Patch0085: block-io-refactor-padding.patch +Patch0086: block-Make-wait-mark-serialising-requests-public.patch +Patch0087: block-Add-bdrv_co_get_self_request.patch +Patch0088: block-file-posix-Let-post-EOF-fallocate-serialize.patch +Patch0089: block-posix-Always-allocate-the-first-block.patch +Patch0090: block-create-Do-not-abort-if-a-block-driver-is-not-a.patch +Patch0091: mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch BuildRequires: flex @@ -407,6 +447,9 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Fri Apr 17 2020 Huawei Technologies Co., Ltd. +- backport patch bundles from qemu stable v4.1.1 + * Thu Apr 16 2020 Huawei Technologies Co., Ltd. - aio-wait: delegate polling of main AioContext if BQL not held - async: use explicit memory barriers diff --git a/roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch b/roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch new file mode 100644 index 0000000..00e6726 --- /dev/null +++ b/roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch @@ -0,0 +1,54 @@ +From fc5afb1a9230fe21d76bcef527b0d3cee90a2cd3 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 12 Sep 2019 18:12:02 -0500 +Subject: [PATCH] roms/Makefile.edk2: don't pull in submodules when building + from tarball +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently the `make efi` target pulls submodules nested under the +roms/edk2 submodule as dependencies. However, when we attempt to build +from a tarball this fails since we are no longer in a git tree. + +A preceding patch will pre-populate these submodules in the tarball, +so assume this build dependency is only needed when building from a +git tree. + +Cc: Laszlo Ersek +Cc: Bruce Rogers +Cc: qemu-stable@nongnu.org # v4.1.0 +Reported-by: Bruce Rogers +Reviewed-by: Laszlo Ersek +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Michael Roth +Message-Id: <20190912231202.12327-3-mdroth@linux.vnet.ibm.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit f3e330e3c319160ac04954399b5a10afc965098c) +Signed-off-by: Michael Roth +--- + roms/Makefile.edk2 | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/roms/Makefile.edk2 b/roms/Makefile.edk2 +index c2f2ff59d5..33a074d3a4 100644 +--- a/roms/Makefile.edk2 ++++ b/roms/Makefile.edk2 +@@ -46,8 +46,13 @@ all: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd.bz2) \ + # files. + .INTERMEDIATE: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd) + ++# Fetch edk2 submodule's submodules. If it is not in a git tree, assume ++# we're building from a tarball and that they've already been fetched by ++# make-release/tarball scripts. + submodules: +- cd edk2 && git submodule update --init --force ++ if test -d edk2/.git; then \ ++ cd edk2 && git submodule update --init --force; \ ++ fi + + # See notes on the ".NOTPARALLEL" target and the "+" indicator in + # "tests/uefi-test-tools/Makefile". +-- +2.23.0 diff --git a/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch b/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch new file mode 100644 index 0000000..5d20a9f --- /dev/null +++ b/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch @@ -0,0 +1,104 @@ +From 051c9b3cbcb4beb42a6ed017c2146ec3e7a754fb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 14 Aug 2019 17:35:21 +0530 +Subject: [PATCH] scsi: lsi: exit infinite loop while executing script + (CVE-2019-12068) + +When executing script in lsi_execute_script(), the LSI scsi adapter +emulator advances 's->dsp' index to read next opcode. This can lead +to an infinite loop if the next opcode is empty. Move the existing +loop exit after 10k iterations so that it covers no-op opcodes as +well. + +Reported-by: Bugs SysSec +Signed-off-by: Paolo Bonzini +Signed-off-by: Prasad J Pandit +Signed-off-by: Paolo Bonzini +(cherry picked from commit de594e47659029316bbf9391efb79da0a1a08e08) +Signed-off-by: Michael Roth +--- + hw/scsi/lsi53c895a.c | 41 +++++++++++++++++++++++++++-------------- + 1 file changed, 27 insertions(+), 14 deletions(-) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index 10468c1ec1..72f7b59ab5 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -185,6 +185,9 @@ static const char *names[] = { + /* Flag set if this is a tagged command. */ + #define LSI_TAG_VALID (1 << 16) + ++/* Maximum instructions to process. */ ++#define LSI_MAX_INSN 10000 ++ + typedef struct lsi_request { + SCSIRequest *req; + uint32_t tag; +@@ -1132,7 +1135,21 @@ static void lsi_execute_script(LSIState *s) + + s->istat1 |= LSI_ISTAT1_SRUN; + again: +- insn_processed++; ++ if (++insn_processed > LSI_MAX_INSN) { ++ /* Some windows drivers make the device spin waiting for a memory ++ location to change. If we have been executed a lot of code then ++ assume this is the case and force an unexpected device disconnect. ++ This is apparently sufficient to beat the drivers into submission. ++ */ ++ if (!(s->sien0 & LSI_SIST0_UDC)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "lsi_scsi: inf. loop with UDC masked"); ++ } ++ lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); ++ lsi_disconnect(s); ++ trace_lsi_execute_script_stop(); ++ return; ++ } + insn = read_dword(s, s->dsp); + if (!insn) { + /* If we receive an empty opcode increment the DSP by 4 bytes +@@ -1569,19 +1586,7 @@ again: + } + } + } +- if (insn_processed > 10000 && s->waiting == LSI_NOWAIT) { +- /* Some windows drivers make the device spin waiting for a memory +- location to change. If we have been executed a lot of code then +- assume this is the case and force an unexpected device disconnect. +- This is apparently sufficient to beat the drivers into submission. +- */ +- if (!(s->sien0 & LSI_SIST0_UDC)) { +- qemu_log_mask(LOG_GUEST_ERROR, +- "lsi_scsi: inf. loop with UDC masked"); +- } +- lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); +- lsi_disconnect(s); +- } else if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) { ++ if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) { + if (s->dcntl & LSI_DCNTL_SSM) { + lsi_script_dma_interrupt(s, LSI_DSTAT_SSI); + } else { +@@ -1969,6 +1974,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) + case 0x2f: /* DSP[24:31] */ + s->dsp &= 0x00ffffff; + s->dsp |= val << 24; ++ /* ++ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one ++ * instruction. Is this correct? ++ */ + if ((s->dmode & LSI_DMODE_MAN) == 0 + && (s->istat1 & LSI_ISTAT1_SRUN) == 0) + lsi_execute_script(s); +@@ -1987,6 +1996,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) + break; + case 0x3b: /* DCNTL */ + s->dcntl = val & ~(LSI_DCNTL_PFF | LSI_DCNTL_STD); ++ /* ++ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one ++ * instruction. Is this correct? ++ */ + if ((val & LSI_DCNTL_STD) && (s->istat1 & LSI_ISTAT1_SRUN) == 0) + lsi_execute_script(s); + break; +-- +2.23.0 diff --git a/target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch b/target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch new file mode 100644 index 0000000..ca4b796 --- /dev/null +++ b/target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch @@ -0,0 +1,41 @@ +From cdc6896659b85f7ed8f7552850312e55170de0c5 Mon Sep 17 00:00:00 2001 +From: Christophe Lyon +Date: Fri, 25 Oct 2019 11:57:11 +0200 +Subject: [PATCH] target/arm: Allow reading flags from FPSCR for M-profile + +rt==15 is a special case when reading the flags: it means the +destination is APSR. This patch avoids rejecting +vmrs apsr_nzcv, fpscr +as illegal instruction. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Christophe Lyon +Message-id: 20191025095711.10853-1-christophe.lyon@linaro.org +[PMM: updated the comment] +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +(cherry picked from commit 2529ab43b8a05534494704e803e0332d111d8b91) +Signed-off-by: Michael Roth +--- + target/arm/translate-vfp.inc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c +index ef45cecbea..75406fd9db 100644 +--- a/target/arm/translate-vfp.inc.c ++++ b/target/arm/translate-vfp.inc.c +@@ -704,9 +704,10 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) + if (arm_dc_feature(s, ARM_FEATURE_M)) { + /* + * The only M-profile VFP vmrs/vmsr sysreg is FPSCR. +- * Writes to R15 are UNPREDICTABLE; we choose to undef. ++ * Accesses to R15 are UNPREDICTABLE; we choose to undef. ++ * (FPSCR -> r15 is a special case which writes to the PSR flags.) + */ +- if (a->rt == 15 || a->reg != ARM_VFP_FPSCR) { ++ if (a->rt == 15 && (!a->l || a->reg != ARM_VFP_FPSCR)) { + return false; + } + } +-- +2.23.0 diff --git a/target-arm-Don-t-abort-on-M-profile-exception-return.patch b/target-arm-Don-t-abort-on-M-profile-exception-return.patch new file mode 100644 index 0000000..b6796e2 --- /dev/null +++ b/target-arm-Don-t-abort-on-M-profile-exception-return.patch @@ -0,0 +1,103 @@ +From 9027d3fba605d8f6093342ebe4a1da450d374630 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Thu, 22 Aug 2019 14:15:34 +0100 +Subject: [PATCH] target/arm: Don't abort on M-profile exception return in + linux-user mode + +An attempt to do an exception-return (branch to one of the magic +addresses) in linux-user mode for M-profile should behave like +a normal branch, because linux-user mode is always going to be +in 'handler' mode. This used to work, but we broke it when we added +support for the M-profile security extension in commit d02a8698d7ae2bfed. + +In that commit we allowed even handler-mode calls to magic return +values to be checked for and dealt with by causing an +EXCP_EXCEPTION_EXIT exception to be taken, because this is +needed for the FNC_RETURN return-from-non-secure-function-call +handling. For system mode we added a check in do_v7m_exception_exit() +to make any spurious calls from Handler mode behave correctly, but +forgot that linux-user mode would also be affected. + +How an attempted return-from-non-secure-function-call in linux-user +mode should be handled is not clear -- on real hardware it would +result in return to secure code (not to the Linux kernel) which +could then handle the error in any way it chose. For QEMU we take +the simple approach of treating this erroneous return the same way +it would be handled on a CPU without the security extensions -- +treat it as a normal branch. + +The upshot of all this is that for linux-user mode we should never +do any of the bx_excret magic, so the code change is simple. + +This ought to be a weird corner case that only affects broken guest +code (because Linux user processes should never be attempting to do +exception returns or NS function returns), except that the code that +assigns addresses in RAM for the process and stack in our linux-user +code does not attempt to avoid this magic address range, so +legitimate code attempting to return to a trampoline routine on the +stack can fall into this case. This change fixes those programs, +but we should also look at restricting the range of memory we +use for M-profile linux-user guests to the area that would be +real RAM in hardware. + +Cc: qemu-stable@nongnu.org +Reported-by: Christophe Lyon +Reviewed-by: Richard Henderson +Signed-off-by: Peter Maydell +Message-id: 20190822131534.16602-1-peter.maydell@linaro.org +Fixes: https://bugs.launchpad.net/qemu/+bug/1840922 +Signed-off-by: Peter Maydell +(cherry picked from commit 5e5584c89f36b302c666bc6db535fd3f7ff35ad2) +Signed-off-by: Michael Roth +--- + target/arm/translate.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +diff --git a/target/arm/translate.c b/target/arm/translate.c +index 7853462b21..24cb4ba075 100644 +--- a/target/arm/translate.c ++++ b/target/arm/translate.c +@@ -952,10 +952,27 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var) + store_cpu_field(var, thumb); + } + +-/* Set PC and Thumb state from var. var is marked as dead. ++/* ++ * Set PC and Thumb state from var. var is marked as dead. + * For M-profile CPUs, include logic to detect exception-return + * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC, + * and BX reg, and no others, and happens only for code in Handler mode. ++ * The Security Extension also requires us to check for the FNC_RETURN ++ * which signals a function return from non-secure state; this can happen ++ * in both Handler and Thread mode. ++ * To avoid having to do multiple comparisons in inline generated code, ++ * we make the check we do here loose, so it will match for EXC_RETURN ++ * in Thread mode. For system emulation do_v7m_exception_exit() checks ++ * for these spurious cases and returns without doing anything (giving ++ * the same behaviour as for a branch to a non-magic address). ++ * ++ * In linux-user mode it is unclear what the right behaviour for an ++ * attempted FNC_RETURN should be, because in real hardware this will go ++ * directly to Secure code (ie not the Linux kernel) which will then treat ++ * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN ++ * attempt behave the way it would on a CPU without the security extension, ++ * which is to say "like a normal branch". That means we can simply treat ++ * all branches as normal with no magic address behaviour. + */ + static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var) + { +@@ -963,10 +980,12 @@ static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var) + * s->base.is_jmp that we need to do the rest of the work later. + */ + gen_bx(s, var); ++#ifndef CONFIG_USER_ONLY + if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) || + (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) { + s->base.is_jmp = DISAS_BX_EXCRET; + } ++#endif + } + + static inline void gen_bx_excret_final_code(DisasContext *s) +-- +2.23.0 diff --git a/target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch b/target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch new file mode 100644 index 0000000..a46232f --- /dev/null +++ b/target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch @@ -0,0 +1,40 @@ +From 38fb634853ac6547326d9f88b9a068d9fc6b4ad4 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Tue, 27 Aug 2019 13:19:31 +0100 +Subject: [PATCH] target/arm: Free TCG temps in trans_VMOV_64_sp() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The function neon_store_reg32() doesn't free the TCG temp that it +is passed, so the caller must do that. We got this right in most +places but forgot to free the TCG temps in trans_VMOV_64_sp(). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20190827121931.26836-1-peter.maydell@linaro.org +(cherry picked from commit 342d27581bd3ecdb995e4fc55fcd383cf3242888) +Signed-off-by: Michael Roth +--- + target/arm/translate-vfp.inc.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c +index 092eb5ec53..ef45cecbea 100644 +--- a/target/arm/translate-vfp.inc.c ++++ b/target/arm/translate-vfp.inc.c +@@ -881,8 +881,10 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) + /* gpreg to fpreg */ + tmp = load_reg(s, a->rt); + neon_store_reg32(tmp, a->vm); ++ tcg_temp_free_i32(tmp); + tmp = load_reg(s, a->rt2); + neon_store_reg32(tmp, a->vm + 1); ++ tcg_temp_free_i32(tmp); + } + + return true; +-- +2.23.0 diff --git a/ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch b/ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch new file mode 100644 index 0000000..30724ce --- /dev/null +++ b/ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch @@ -0,0 +1,62 @@ +From 6705b9344f8d6f134f612c2e35e87cdda5aa6284 Mon Sep 17 00:00:00 2001 +From: Hikaru Nishida +Date: Tue, 15 Oct 2019 10:07:34 +0900 +Subject: [PATCH] ui: Fix hanging up Cocoa display on macOS 10.15 (Catalina) + +macOS API documentation says that before applicationDidFinishLaunching +is called, any events will not be processed. However, some events are +fired before it is called in macOS Catalina. This causes deadlock of +iothread_lock in handleEvent while it will be released after the +app_started_sem is posted. +This patch avoids processing events before the app_started_sem is +posted to prevent this deadlock. + +Buglink: https://bugs.launchpad.net/qemu/+bug/1847906 +Signed-off-by: Hikaru Nishida +Message-id: 20191015010734.85229-1-hikarupsp@gmail.com +Signed-off-by: Gerd Hoffmann +(cherry picked from commit dff742ad27efa474ec04accdbf422c9acfd3e30e) +Signed-off-by: Michael Roth +--- + ui/cocoa.m | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/ui/cocoa.m b/ui/cocoa.m +index c2984028c5..3026ead621 100644 +--- a/ui/cocoa.m ++++ b/ui/cocoa.m +@@ -132,6 +132,7 @@ NSArray * supportedImageFileTypes; + + static QemuSemaphore display_init_sem; + static QemuSemaphore app_started_sem; ++static bool allow_events; + + // Utility functions to run specified code block with iothread lock held + typedef void (^CodeBlock)(void); +@@ -727,6 +728,16 @@ QemuCocoaView *cocoaView; + + - (bool) handleEvent:(NSEvent *)event + { ++ if(!allow_events) { ++ /* ++ * Just let OSX have all events that arrive before ++ * applicationDidFinishLaunching. ++ * This avoids a deadlock on the iothread lock, which cocoa_display_init() ++ * will not drop until after the app_started_sem is posted. (In theory ++ * there should not be any such events, but OSX Catalina now emits some.) ++ */ ++ return false; ++ } + return bool_with_iothread_lock(^{ + return [self handleEventLocked:event]; + }); +@@ -1154,6 +1165,7 @@ QemuCocoaView *cocoaView; + - (void)applicationDidFinishLaunching: (NSNotification *) note + { + COCOA_DEBUG("QemuCocoaAppController: applicationDidFinishLaunching\n"); ++ allow_events = true; + /* Tell cocoa_display_init to proceed */ + qemu_sem_post(&app_started_sem); + } +-- +2.23.0 diff --git a/util-hbitmap-strict-hbitmap_reset.patch b/util-hbitmap-strict-hbitmap_reset.patch new file mode 100644 index 0000000..b7f568f --- /dev/null +++ b/util-hbitmap-strict-hbitmap_reset.patch @@ -0,0 +1,77 @@ +From fcd7cba6acb7344aca70f5f8ec16626e817b35a5 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 6 Aug 2019 18:26:11 +0300 +Subject: [PATCH] util/hbitmap: strict hbitmap_reset + +hbitmap_reset has an unobvious property: it rounds requested region up. +It may provoke bugs, like in recently fixed write-blocking mode of +mirror: user calls reset on unaligned region, not keeping in mind that +there are possible unrelated dirty bytes, covered by rounded-up region +and information of this unrelated "dirtiness" will be lost. + +Make hbitmap_reset strict: assert that arguments are aligned, allowing +only one exception when @start + @count == hb->orig_size. It's needed +to comfort users of hbitmap_next_dirty_area, which cares about +hb->orig_size. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +Message-Id: <20190806152611.280389-1-vsementsov@virtuozzo.com> +[Maintainer edit: Max's suggestions from on-list. --js] +[Maintainer edit: Eric's suggestion for aligned macro. --js] +Signed-off-by: John Snow +(cherry picked from commit 48557b138383aaf69c2617ca9a88bfb394fc50ec) +*prereq for fed33bd175f663cc8c13f8a490a4f35a19756cfe +Signed-off-by: Michael Roth +--- + include/qemu/hbitmap.h | 5 +++++ + tests/test-hbitmap.c | 2 +- + util/hbitmap.c | 4 ++++ + 3 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h +index 4afbe6292e..1bf944ca3d 100644 +--- a/include/qemu/hbitmap.h ++++ b/include/qemu/hbitmap.h +@@ -132,6 +132,11 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count); + * @count: Number of bits to reset. + * + * Reset a consecutive range of bits in an HBitmap. ++ * @start and @count must be aligned to bitmap granularity. The only exception ++ * is resetting the tail of the bitmap: @count may be equal to hb->orig_size - ++ * @start, in this case @count may be not aligned. The sum of @start + @count is ++ * allowed to be greater than hb->orig_size, but only if @start < hb->orig_size ++ * and @start + @count = ALIGN_UP(hb->orig_size, granularity). + */ + void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count); + +diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c +index 592d8219db..2be56d1597 100644 +--- a/tests/test-hbitmap.c ++++ b/tests/test-hbitmap.c +@@ -423,7 +423,7 @@ static void test_hbitmap_granularity(TestHBitmapData *data, + hbitmap_test_check(data, 0); + hbitmap_test_set(data, 0, 3); + g_assert_cmpint(hbitmap_count(data->hb), ==, 4); +- hbitmap_test_reset(data, 0, 1); ++ hbitmap_test_reset(data, 0, 2); + g_assert_cmpint(hbitmap_count(data->hb), ==, 2); + } + +diff --git a/util/hbitmap.c b/util/hbitmap.c +index bcc0acdc6a..71c6ba2c52 100644 +--- a/util/hbitmap.c ++++ b/util/hbitmap.c +@@ -476,6 +476,10 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) + /* Compute range in the last layer. */ + uint64_t first; + uint64_t last = start + count - 1; ++ uint64_t gran = 1ULL << hb->granularity; ++ ++ assert(QEMU_IS_ALIGNED(start, gran)); ++ assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size)); + + trace_hbitmap_reset(hb, start, count, + start >> hb->granularity, last >> hb->granularity); +-- +2.23.0 diff --git a/util-iov-improve-qemu_iovec_is_zero.patch b/util-iov-improve-qemu_iovec_is_zero.patch new file mode 100644 index 0000000..0cca67b --- /dev/null +++ b/util-iov-improve-qemu_iovec_is_zero.patch @@ -0,0 +1,102 @@ +From b3b76fc643912d2c86b13caff30a1151f2958702 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 4 Jun 2019 19:15:04 +0300 +Subject: [PATCH] util/iov: improve qemu_iovec_is_zero + +We'll need to check a part of qiov soon, so implement it now. + +Optimization with align down to 4 * sizeof(long) is dropped due to: +1. It is strange: it aligns length of the buffer, but where is a + guarantee that buffer pointer is aligned itself? +2. buffer_is_zero() is a better place for optimizations and it has + them. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Acked-by: Stefan Hajnoczi +Message-id: 20190604161514.262241-3-vsementsov@virtuozzo.com +Message-Id: <20190604161514.262241-3-vsementsov@virtuozzo.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f76889e7b947d896db51be8a4d9c941c2f70365a) +*prereq for 292d06b9 +Signed-off-by: Michael Roth +--- + block/io.c | 2 +- + include/qemu/iov.h | 2 +- + util/iov.c | 31 +++++++++++++++++++------------ + 3 files changed, 21 insertions(+), 14 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 06305c6ea6..dccf687acc 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1715,7 +1715,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, + + if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && + !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes && +- qemu_iovec_is_zero(qiov)) { ++ qemu_iovec_is_zero(qiov, 0, qiov->size)) { + flags |= BDRV_REQ_ZERO_WRITE; + if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { + flags |= BDRV_REQ_MAY_UNMAP; +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index f3787a0cf7..29957c8a72 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -212,7 +212,7 @@ void qemu_iovec_concat(QEMUIOVector *dst, + size_t qemu_iovec_concat_iov(QEMUIOVector *dst, + struct iovec *src_iov, unsigned int src_cnt, + size_t soffset, size_t sbytes); +-bool qemu_iovec_is_zero(QEMUIOVector *qiov); ++bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t qiov_offeset, size_t bytes); + void qemu_iovec_destroy(QEMUIOVector *qiov); + void qemu_iovec_reset(QEMUIOVector *qiov); + size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, +diff --git a/util/iov.c b/util/iov.c +index 366ff9cdd1..9ac0261853 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -451,23 +451,30 @@ void qemu_iovec_init_extended( + } + + /* +- * Check if the contents of the iovecs are all zero ++ * Check if the contents of subrange of qiov data is all zeroes. + */ +-bool qemu_iovec_is_zero(QEMUIOVector *qiov) ++bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) + { +- int i; +- for (i = 0; i < qiov->niov; i++) { +- size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long)); +- uint8_t *ptr = qiov->iov[i].iov_base; +- if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) { ++ struct iovec *iov; ++ size_t current_offset; ++ ++ assert(offset + bytes <= qiov->size); ++ ++ iov = iov_skip_offset(qiov->iov, offset, ¤t_offset); ++ ++ while (bytes) { ++ uint8_t *base = (uint8_t *)iov->iov_base + current_offset; ++ size_t len = MIN(iov->iov_len - current_offset, bytes); ++ ++ if (!buffer_is_zero(base, len)) { + return false; + } +- for (; offs < qiov->iov[i].iov_len; offs++) { +- if (ptr[offs]) { +- return false; +- } +- } ++ ++ current_offset = 0; ++ bytes -= len; ++ iov++; + } ++ + return true; + } + +-- +2.23.0 diff --git a/util-iov-introduce-qemu_iovec_init_extended.patch b/util-iov-introduce-qemu_iovec_init_extended.patch new file mode 100644 index 0000000..0a488a6 --- /dev/null +++ b/util-iov-introduce-qemu_iovec_init_extended.patch @@ -0,0 +1,177 @@ +From cff024fe856ab36db3056ba4cb1d7cfa4c39795d Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 4 Jun 2019 19:15:03 +0300 +Subject: [PATCH] util/iov: introduce qemu_iovec_init_extended + +Introduce new initialization API, to create requests with padding. Will +be used in the following patch. New API uses qemu_iovec_init_buf if +resulting io vector has only one element, to avoid extra allocations. +So, we need to update qemu_iovec_destroy to support destroying such +QIOVs. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Acked-by: Stefan Hajnoczi +Message-id: 20190604161514.262241-2-vsementsov@virtuozzo.com +Message-Id: <20190604161514.262241-2-vsementsov@virtuozzo.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit d953169d4840f312d3b9a54952f4a7ccfcb3b311) +*prereq for 292d06b9 +Signed-off-by: Michael Roth +--- + include/qemu/iov.h | 7 +++ + util/iov.c | 112 +++++++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 114 insertions(+), 5 deletions(-) + +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index 48b45987b7..f3787a0cf7 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -199,6 +199,13 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) + + void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); + void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); ++void qemu_iovec_init_extended( ++ QEMUIOVector *qiov, ++ void *head_buf, size_t head_len, ++ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, ++ void *tail_buf, size_t tail_len); ++void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, ++ size_t offset, size_t len); + void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); + void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes); +diff --git a/util/iov.c b/util/iov.c +index 74e6ca8ed7..366ff9cdd1 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -353,6 +353,103 @@ void qemu_iovec_concat(QEMUIOVector *dst, + qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes); + } + ++/* ++ * qiov_find_iov ++ * ++ * Return pointer to iovec structure, where byte at @offset in original vector ++ * @iov exactly is. ++ * Set @remaining_offset to be offset inside that iovec to the same byte. ++ */ ++static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, ++ size_t *remaining_offset) ++{ ++ while (offset > 0 && offset >= iov->iov_len) { ++ offset -= iov->iov_len; ++ iov++; ++ } ++ *remaining_offset = offset; ++ ++ return iov; ++} ++ ++/* ++ * qiov_slice ++ * ++ * Find subarray of iovec's, containing requested range. @head would ++ * be offset in first iov (returned by the function), @tail would be ++ * count of extra bytes in last iovec (returned iov + @niov - 1). ++ */ ++static struct iovec *qiov_slice(QEMUIOVector *qiov, ++ size_t offset, size_t len, ++ size_t *head, size_t *tail, int *niov) ++{ ++ struct iovec *iov, *end_iov; ++ ++ assert(offset + len <= qiov->size); ++ ++ iov = iov_skip_offset(qiov->iov, offset, head); ++ end_iov = iov_skip_offset(iov, *head + len, tail); ++ ++ if (*tail > 0) { ++ assert(*tail < end_iov->iov_len); ++ *tail = end_iov->iov_len - *tail; ++ end_iov++; ++ } ++ ++ *niov = end_iov - iov; ++ ++ return iov; ++} ++ ++/* ++ * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, ++ * and @tail_buf buffer into new qiov. ++ */ ++void qemu_iovec_init_extended( ++ QEMUIOVector *qiov, ++ void *head_buf, size_t head_len, ++ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, ++ void *tail_buf, size_t tail_len) ++{ ++ size_t mid_head, mid_tail; ++ int total_niov, mid_niov = 0; ++ struct iovec *p, *mid_iov; ++ ++ if (mid_len) { ++ mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, ++ &mid_head, &mid_tail, &mid_niov); ++ } ++ ++ total_niov = !!head_len + mid_niov + !!tail_len; ++ if (total_niov == 1) { ++ qemu_iovec_init_buf(qiov, NULL, 0); ++ p = &qiov->local_iov; ++ } else { ++ qiov->niov = qiov->nalloc = total_niov; ++ qiov->size = head_len + mid_len + tail_len; ++ p = qiov->iov = g_new(struct iovec, qiov->niov); ++ } ++ ++ if (head_len) { ++ p->iov_base = head_buf; ++ p->iov_len = head_len; ++ p++; ++ } ++ ++ if (mid_len) { ++ memcpy(p, mid_iov, mid_niov * sizeof(*p)); ++ p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; ++ p[0].iov_len -= mid_head; ++ p[mid_niov - 1].iov_len -= mid_tail; ++ p += mid_niov; ++ } ++ ++ if (tail_len) { ++ p->iov_base = tail_buf; ++ p->iov_len = tail_len; ++ } ++} ++ + /* + * Check if the contents of the iovecs are all zero + */ +@@ -374,14 +471,19 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov) + return true; + } + ++void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, ++ size_t offset, size_t len) ++{ ++ qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); ++} ++ + void qemu_iovec_destroy(QEMUIOVector *qiov) + { +- assert(qiov->nalloc != -1); ++ if (qiov->nalloc != -1) { ++ g_free(qiov->iov); ++ } + +- qemu_iovec_reset(qiov); +- g_free(qiov->iov); +- qiov->nalloc = 0; +- qiov->iov = NULL; ++ memset(qiov, 0, sizeof(*qiov)); + } + + void qemu_iovec_reset(QEMUIOVector *qiov) +-- +2.23.0 diff --git a/vhost-user-save-features-if-the-char-dev-is-closed.patch b/vhost-user-save-features-if-the-char-dev-is-closed.patch new file mode 100644 index 0000000..9a0d04f --- /dev/null +++ b/vhost-user-save-features-if-the-char-dev-is-closed.patch @@ -0,0 +1,42 @@ +From 7b404cae7fa2850d476c29258f03b8e77a5b4bd0 Mon Sep 17 00:00:00 2001 +From: Adrian Moreno +Date: Tue, 24 Sep 2019 18:20:44 +0200 +Subject: [PATCH] vhost-user: save features if the char dev is closed + +That way the state can be correctly restored when the device is opened +again. This might happen if the backend is restarted. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1738768 +Reported-by: Pei Zhang +Fixes: 6ab79a20af3a ("do not call vhost_net_cleanup() on running net from char user event") +Cc: ddstreet@canonical.com +Cc: Michael S. Tsirkin +Cc: qemu-stable@nongnu.org +Signed-off-by: Adrian Moreno +Message-Id: <20190924162044.11414-1-amorenoz@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit c6beefd674fff8d41b90365dfccad32e53a5abcb) +Signed-off-by: Michael Roth +--- + net/vhost-user.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/vhost-user.c b/net/vhost-user.c +index 51921de443..014199d600 100644 +--- a/net/vhost-user.c ++++ b/net/vhost-user.c +@@ -235,6 +235,10 @@ static void chr_closed_bh(void *opaque) + + s = DO_UPCAST(NetVhostUserState, nc, ncs[0]); + ++ if (s->vhost_net) { ++ s->acked_features = vhost_net_get_acked_features(s->vhost_net); ++ } ++ + qmp_set_link(name, false, &err); + + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, +-- +2.23.0 diff --git a/virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch b/virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch new file mode 100644 index 0000000..3c2a3f2 --- /dev/null +++ b/virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch @@ -0,0 +1,80 @@ +From 01be50603be4f17af4318a7a3fe58dcc6dab1b31 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 16 Aug 2019 19:15:03 +0200 +Subject: [PATCH] virtio-blk: Cancel the pending BH when the dataplane is reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When 'system_reset' is called, the main loop clear the memory +region cache before the BH has a chance to execute. Later when +the deferred function is called, some assumptions that were +made when scheduling them are no longer true when they actually +execute. + +This is what happens using a virtio-blk device (fresh RHEL7.8 install): + + $ (sleep 12.3; echo system_reset; sleep 12.3; echo system_reset; sleep 1; echo q) \ + | qemu-system-x86_64 -m 4G -smp 8 -boot menu=on \ + -device virtio-blk-pci,id=image1,drive=drive_image1 \ + -drive file=/var/lib/libvirt/images/rhel78.qcow2,if=none,id=drive_image1,format=qcow2,cache=none \ + -device virtio-net-pci,netdev=net0,id=nic0,mac=52:54:00:c4:e7:84 \ + -netdev tap,id=net0,script=/bin/true,downscript=/bin/true,vhost=on \ + -monitor stdio -serial null -nographic + (qemu) system_reset + (qemu) system_reset + (qemu) qemu-system-x86_64: hw/virtio/virtio.c:225: vring_get_region_caches: Assertion `caches != NULL' failed. + Aborted + + (gdb) bt + Thread 1 (Thread 0x7f109c17b680 (LWP 10939)): + #0 0x00005604083296d1 in vring_get_region_caches (vq=0x56040a24bdd0) at hw/virtio/virtio.c:227 + #1 0x000056040832972b in vring_avail_flags (vq=0x56040a24bdd0) at hw/virtio/virtio.c:235 + #2 0x000056040832d13d in virtio_should_notify (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1648 + #3 0x000056040832d1f8 in virtio_notify_irqfd (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1662 + #4 0x00005604082d213d in notify_guest_bh (opaque=0x56040a243ec0) at hw/block/dataplane/virtio-blk.c:75 + #5 0x000056040883dc35 in aio_bh_call (bh=0x56040a243f10) at util/async.c:90 + #6 0x000056040883dccd in aio_bh_poll (ctx=0x560409161980) at util/async.c:118 + #7 0x0000560408842af7 in aio_dispatch (ctx=0x560409161980) at util/aio-posix.c:460 + #8 0x000056040883e068 in aio_ctx_dispatch (source=0x560409161980, callback=0x0, user_data=0x0) at util/async.c:261 + #9 0x00007f10a8fca06d in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 + #10 0x0000560408841445 in glib_pollfds_poll () at util/main-loop.c:215 + #11 0x00005604088414bf in os_host_main_loop_wait (timeout=0) at util/main-loop.c:238 + #12 0x00005604088415c4 in main_loop_wait (nonblocking=0) at util/main-loop.c:514 + #13 0x0000560408416b1e in main_loop () at vl.c:1923 + #14 0x000056040841e0e8 in main (argc=20, argv=0x7ffc2c3f9c58, envp=0x7ffc2c3f9d00) at vl.c:4578 + +Fix this by cancelling the BH when the virtio dataplane is stopped. + +[This is version of the patch was modified as discussed with Philippe on +the mailing list thread. +--Stefan] + +Reported-by: Yihuang Yu +Suggested-by: Stefan Hajnoczi +Fixes: https://bugs.launchpad.net/qemu/+bug/1839428 +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20190816171503.24761-1-philmd@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit ebb6ff25cd888a52a64a9adc3692541c6d1d9a42) +Signed-off-by: Michael Roth +--- + hw/block/dataplane/virtio-blk.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 158c78f852..5fea76df85 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -297,6 +297,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + ++ qemu_bh_cancel(s->bh); ++ notify_guest_bh(s); /* final chance to notify guest */ ++ + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, nvqs, false); + +-- +2.23.0 diff --git a/virtio-net-prevent-offloads-reset-on-migration.patch b/virtio-net-prevent-offloads-reset-on-migration.patch new file mode 100644 index 0000000..ab8fbe2 --- /dev/null +++ b/virtio-net-prevent-offloads-reset-on-migration.patch @@ -0,0 +1,122 @@ +From 4887acf574a573137660aa98d9d422ece0a41a5a Mon Sep 17 00:00:00 2001 +From: Mikhail Sennikovsky +Date: Fri, 11 Oct 2019 15:58:04 +0200 +Subject: [PATCH] virtio-net: prevent offloads reset on migration + +Currently offloads disabled by guest via the VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET +command are not preserved on VM migration. +Instead all offloads reported by guest features (via VIRTIO_PCI_GUEST_FEATURES) +get enabled. +What happens is: first the VirtIONet::curr_guest_offloads gets restored and offloads +are getting set correctly: + + #0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=0, tso6=0, ecn=0, ufo=0) at net/net.c:474 + #1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720 + #2 virtio_net_post_load_device (opaque=0x555557701ca0, version_id=11) at hw/net/virtio-net.c:2334 + #3 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577c80 , opaque=0x555557701ca0, version_id=11) + at migration/vmstate.c:168 + #4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2197 + #5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036 + #6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 , opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143 + #7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829 + #8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211 + #9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395 + #10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467 + #11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449 + +However later on the features are getting restored, and offloads get reset to +everything supported by features: + + #0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=1, tso6=1, ecn=0, ufo=0) at net/net.c:474 + #1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720 + #2 virtio_net_set_features (vdev=0x555557701ca0, features=5104441767) at hw/net/virtio-net.c:773 + #3 virtio_set_features_nocheck (vdev=0x555557701ca0, val=5104441767) at hw/virtio/virtio.c:2052 + #4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2220 + #5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036 + #6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 , opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143 + #7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829 + #8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211 + #9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395 + #10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467 + #11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449 + +Fix this by preserving the state in saved_guest_offloads field and +pushing out offload initialization to the new post load hook. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Mikhail Sennikovsky +Signed-off-by: Jason Wang +(cherry picked from commit 7788c3f2e21e35902d45809b236791383bbb613e) +Signed-off-by: Michael Roth +--- + hw/net/virtio-net.c | 27 ++++++++++++++++++++++++--- + include/hw/virtio/virtio-net.h | 2 ++ + 2 files changed, 26 insertions(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index b9e1cd71cf..6adb0fe252 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2330,9 +2330,13 @@ static int virtio_net_post_load_device(void *opaque, int version_id) + n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); + } + +- if (peer_has_vnet_hdr(n)) { +- virtio_net_apply_guest_offloads(n); +- } ++ /* ++ * curr_guest_offloads will be later overwritten by the ++ * virtio_set_features_nocheck call done from the virtio_load. ++ * Here we make sure it is preserved and restored accordingly ++ * in the virtio_net_post_load_virtio callback. ++ */ ++ n->saved_guest_offloads = n->curr_guest_offloads; + + virtio_net_set_queues(n); + +@@ -2367,6 +2371,22 @@ static int virtio_net_post_load_device(void *opaque, int version_id) + return 0; + } + ++static int virtio_net_post_load_virtio(VirtIODevice *vdev) ++{ ++ VirtIONet *n = VIRTIO_NET(vdev); ++ /* ++ * The actual needed state is now in saved_guest_offloads, ++ * see virtio_net_post_load_device for detail. ++ * Restore it back and apply the desired offloads. ++ */ ++ n->curr_guest_offloads = n->saved_guest_offloads; ++ if (peer_has_vnet_hdr(n)) { ++ virtio_net_apply_guest_offloads(n); ++ } ++ ++ return 0; ++} ++ + /* tx_waiting field of a VirtIONetQueue */ + static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { + .name = "virtio-net-queue-tx_waiting", +@@ -2909,6 +2929,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) + vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; + vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; + vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); ++ vdc->post_load = virtio_net_post_load_virtio; + vdc->vmsd = &vmstate_virtio_net_device; + } + +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index b96f0c643f..07a9319f4b 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -182,6 +182,8 @@ struct VirtIONet { + char *netclient_name; + char *netclient_type; + uint64_t curr_guest_offloads; ++ /* used on saved state restore phase to preserve the curr_guest_offloads */ ++ uint64_t saved_guest_offloads; + AnnounceTimer announce_timer; + bool needs_vnet_hdr_swap; + bool mtu_bypass_backend; +-- +2.23.0 diff --git a/virtio-new-post_load-hook.patch b/virtio-new-post_load-hook.patch new file mode 100644 index 0000000..974f286 --- /dev/null +++ b/virtio-new-post_load-hook.patch @@ -0,0 +1,63 @@ +From 8010d3fce008dd13f155bc0babfe236ea44a2712 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Fri, 11 Oct 2019 15:58:03 +0200 +Subject: [PATCH] virtio: new post_load hook + +Post load hook in virtio vmsd is called early while device is processed, +and when VirtIODevice core isn't fully initialized. Most device +specific code isn't ready to deal with a device in such state, and +behaves weirdly. + +Add a new post_load hook in a device class instead. Devices should use +this unless they specifically want to verify the migration stream as +it's processed, e.g. for bounds checking. + +Cc: qemu-stable@nongnu.org +Suggested-by: "Dr. David Alan Gilbert" +Cc: Mikhail Sennikovsky +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Jason Wang +(cherry picked from commit 1dd713837cac8ec5a97d3b8492d72ce5ac94803c) +Signed-off-by: Michael Roth +--- + hw/virtio/virtio.c | 7 +++++++ + include/hw/virtio/virtio.h | 6 ++++++ + 2 files changed, 13 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index a94ea18a9c..7c3822c3a0 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2287,6 +2287,13 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) + } + rcu_read_unlock(); + ++ if (vdc->post_load) { ++ ret = vdc->post_load(vdev); ++ if (ret) { ++ return ret; ++ } ++ } ++ + return 0; + } + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index b189788cb2..f9f62370e9 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -158,6 +158,12 @@ typedef struct VirtioDeviceClass { + */ + void (*save)(VirtIODevice *vdev, QEMUFile *f); + int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id); ++ /* Post load hook in vmsd is called early while device is processed, and ++ * when VirtIODevice isn't fully initialized. Devices should use this instead, ++ * unless they specifically want to verify the migration stream as it's ++ * processed, e.g. for bounds checking. ++ */ ++ int (*post_load)(VirtIODevice *vdev); + const VMStateDescription *vmsd; + } VirtioDeviceClass; + +-- +2.23.0 diff --git a/vpc-Return-0-from-vpc_co_create-on-success.patch b/vpc-Return-0-from-vpc_co_create-on-success.patch new file mode 100644 index 0000000..46fbd90 --- /dev/null +++ b/vpc-Return-0-from-vpc_co_create-on-success.patch @@ -0,0 +1,49 @@ +From 97c478c355fee96eb2b740313f50561e69b6f305 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 2 Sep 2019 21:33:16 +0200 +Subject: [PATCH] vpc: Return 0 from vpc_co_create() on success + +blockdev_create_run() directly uses .bdrv_co_create()'s return value as +the job's return value. Jobs must return 0 on success, not just any +nonnegative value. Therefore, using blockdev-create for VPC images may +currently fail as the vpc driver may return a positive integer. + +Because there is no point in returning a positive integer anywhere in +the block layer (all non-negative integers are generally treated as +complete success), we probably do not want to add more such cases. +Therefore, fix this problem by making the vpc driver always return 0 in +case of success. + +Suggested-by: Kevin Wolf +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 1a37e3124407b5a145d44478d3ecbdb89c63789f) +Signed-off-by: Michael Roth +--- + block/vpc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/block/vpc.c b/block/vpc.c +index d4776ee8a5..3a88e28e2b 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -885,6 +885,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, + goto fail; + } + ++ ret = 0; + fail: + return ret; + } +@@ -908,7 +909,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, + return ret; + } + +- return ret; ++ return 0; + } + + static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts, +-- +2.23.0 diff --git a/x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch b/x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch new file mode 100644 index 0000000..fc17f48 --- /dev/null +++ b/x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch @@ -0,0 +1,60 @@ +From 725dfa851f8e1de8653f41a4bd38c7f98757eb40 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 2 Sep 2019 08:02:22 -0400 +Subject: [PATCH] x86: do not advertise die-id in query-hotpluggbale-cpus if + '-smp dies' is not set + +Commit 176d2cda0 (i386/cpu: Consolidate die-id validity in smp context) added +new 'die-id' topology property to CPUs and exposed it via QMP command +query-hotpluggable-cpus, which broke -device/device_add cpu-foo for existing +users that do not support die-id/dies yet. That's would be fine if it happened +to new machine type only but it also happened to old machine types, +which breaks migration from old QEMU to the new one, for example following CLI: + + OLD-QEMU -M pc-i440fx-4.0 -smp 1,max_cpus=2 \ + -device qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id +is not able to start with new QEMU, complaining about invalid die-id. + +After discovering regression, the patch + "pc: Don't make die-id mandatory unless necessary" +makes die-id optional so old CLI would work. + +However it's not enough as new QEMU still exposes die-id via query-hotpluggbale-cpus +QMP command, so the users that started old machine type on new QEMU, using all +properties (including die-id) received from QMP command (as required), won't be +able to start old QEMU using the same properties since it doesn't support die-id. + +Fix it by hiding die-id in query-hotpluggbale-cpus for all machine types in case +'-smp dies' is not provided on CLI or -smp dies = 1', in which case smp_dies == 1 +and APIC ID is calculated in default way (as it was before DIE support) so we won't +need compat code as in both cases the topology provided to guest via CPUID is the same. + +Signed-off-by: Igor Mammedov +Message-Id: <20190902120222.6179-1-imammedo@redhat.com> +Reviewed-by: Eduardo Habkost +Signed-off-by: Eduardo Habkost +(cherry picked from commit c6c1bb89fb46f3b88f832e654cf5a6f7941aac51) +Signed-off-by: Michael Roth +--- + hw/i386/pc.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 947f81070f..d011733ff7 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2887,8 +2887,10 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) + ms->smp.threads, &topo); + ms->possible_cpus->cpus[i].props.has_socket_id = true; + ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; +- ms->possible_cpus->cpus[i].props.has_die_id = true; +- ms->possible_cpus->cpus[i].props.die_id = topo.die_id; ++ if (pcms->smp_dies > 1) { ++ ms->possible_cpus->cpus[i].props.has_die_id = true; ++ ms->possible_cpus->cpus[i].props.die_id = topo.die_id; ++ } + ms->possible_cpus->cpus[i].props.has_core_id = true; + ms->possible_cpus->cpus[i].props.core_id = topo.core_id; + ms->possible_cpus->cpus[i].props.has_thread_id = true; +-- +2.23.0