Backport: backport form upstream stable v4.1.1
This patch backports bugfix patch series from qemu upstream v4.1.1 Signed-off-by: Ying Fang <fangying1@huawei.com>
This commit is contained in:
parent
cbfda6760e
commit
1e4b6553e3
88
Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch
Normal file
88
Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch
Normal file
@ -0,0 +1,88 @@
|
||||
From 73a5bf472921068e6db10e7e325b7ac46f111834 Mon Sep 17 00:00:00 2001
|
||||
From: John Snow <jsnow@redhat.com>
|
||||
Date: Mon, 29 Jul 2019 18:36:05 -0400
|
||||
Subject: [PATCH] Revert "ide/ahci: Check for -ECANCELED in aio callbacks"
|
||||
|
||||
This reverts commit 0d910cfeaf2076b116b4517166d5deb0fea76394.
|
||||
|
||||
It's not correct to just ignore an error code in a callback; we need to
|
||||
handle that error and possible report failure to the guest so that they
|
||||
don't wait indefinitely for an operation that will now never finish.
|
||||
|
||||
This ought to help cases reported by Nutanix where iSCSI returns a
|
||||
legitimate -ECANCELED for certain operations which should be propagated
|
||||
normally.
|
||||
|
||||
Reported-by: Shaju Abraham <shaju.abraham@nutanix.com>
|
||||
Signed-off-by: John Snow <jsnow@redhat.com>
|
||||
Message-id: 20190729223605.7163-1-jsnow@redhat.com
|
||||
Signed-off-by: John Snow <jsnow@redhat.com>
|
||||
(cherry picked from commit 8ec41c4265714255d5a138f8b538faf3583dcff6)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
hw/ide/ahci.c | 3 ---
|
||||
hw/ide/core.c | 14 --------------
|
||||
2 files changed, 17 deletions(-)
|
||||
|
||||
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
|
||||
index 00ba422a48..6aaf66534a 100644
|
||||
--- a/hw/ide/ahci.c
|
||||
+++ b/hw/ide/ahci.c
|
||||
@@ -1023,9 +1023,6 @@ static void ncq_cb(void *opaque, int ret)
|
||||
IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
|
||||
|
||||
ncq_tfs->aiocb = NULL;
|
||||
- if (ret == -ECANCELED) {
|
||||
- return;
|
||||
- }
|
||||
|
||||
if (ret < 0) {
|
||||
bool is_read = ncq_tfs->cmd == READ_FPDMA_QUEUED;
|
||||
diff --git a/hw/ide/core.c b/hw/ide/core.c
|
||||
index 6afadf894f..8e1624f7ce 100644
|
||||
--- a/hw/ide/core.c
|
||||
+++ b/hw/ide/core.c
|
||||
@@ -722,9 +722,6 @@ static void ide_sector_read_cb(void *opaque, int ret)
|
||||
s->pio_aiocb = NULL;
|
||||
s->status &= ~BUSY_STAT;
|
||||
|
||||
- if (ret == -ECANCELED) {
|
||||
- return;
|
||||
- }
|
||||
if (ret != 0) {
|
||||
if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO |
|
||||
IDE_RETRY_READ)) {
|
||||
@@ -840,10 +837,6 @@ static void ide_dma_cb(void *opaque, int ret)
|
||||
uint64_t offset;
|
||||
bool stay_active = false;
|
||||
|
||||
- if (ret == -ECANCELED) {
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
if (ret == -EINVAL) {
|
||||
ide_dma_error(s);
|
||||
return;
|
||||
@@ -975,10 +968,6 @@ static void ide_sector_write_cb(void *opaque, int ret)
|
||||
IDEState *s = opaque;
|
||||
int n;
|
||||
|
||||
- if (ret == -ECANCELED) {
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
s->pio_aiocb = NULL;
|
||||
s->status &= ~BUSY_STAT;
|
||||
|
||||
@@ -1058,9 +1047,6 @@ static void ide_flush_cb(void *opaque, int ret)
|
||||
|
||||
s->pio_aiocb = NULL;
|
||||
|
||||
- if (ret == -ECANCELED) {
|
||||
- return;
|
||||
- }
|
||||
if (ret < 0) {
|
||||
/* XXX: What sector number to set here? */
|
||||
if (ide_handle_rw_error(s, -ret, IDE_RETRY_FLUSH)) {
|
||||
--
|
||||
2.23.0
|
||||
59
block-Add-bdrv_co_get_self_request.patch
Normal file
59
block-Add-bdrv_co_get_self_request.patch
Normal file
@ -0,0 +1,59 @@
|
||||
From d9b88f7e0d56feb4d7daa2506e2756fc48e975a1 Mon Sep 17 00:00:00 2001
|
||||
From: Max Reitz <mreitz@redhat.com>
|
||||
Date: Fri, 1 Nov 2019 16:25:09 +0100
|
||||
Subject: [PATCH] block: Add bdrv_co_get_self_request()
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
Message-id: 20191101152510.11719-3-mreitz@redhat.com
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit c28107e9e55b11cd35cf3dc2505e3e69d10dcf13)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/io.c | 18 ++++++++++++++++++
|
||||
include/block/block_int.h | 1 +
|
||||
2 files changed, 19 insertions(+)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index d4ceaaa2ce..65b5102714 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -721,6 +721,24 @@ static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
|
||||
(req->bytes == req->overlap_bytes);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * Return the tracked request on @bs for the current coroutine, or
|
||||
+ * NULL if there is none.
|
||||
+ */
|
||||
+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
|
||||
+{
|
||||
+ BdrvTrackedRequest *req;
|
||||
+ Coroutine *self = qemu_coroutine_self();
|
||||
+
|
||||
+ QLIST_FOREACH(req, &bs->tracked_requests, list) {
|
||||
+ if (req->co == self) {
|
||||
+ return req;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* Round a region to cluster boundaries
|
||||
*/
|
||||
diff --git a/include/block/block_int.h b/include/block/block_int.h
|
||||
index 4465b02242..05ee6b4866 100644
|
||||
--- a/include/block/block_int.h
|
||||
+++ b/include/block/block_int.h
|
||||
@@ -964,6 +964,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
|
||||
|
||||
bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
|
||||
void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
|
||||
+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
|
||||
|
||||
int get_tmp_filename(char *filename, int size);
|
||||
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
|
||||
--
|
||||
2.23.0
|
||||
131
block-Make-wait-mark-serialising-requests-public.patch
Normal file
131
block-Make-wait-mark-serialising-requests-public.patch
Normal file
@ -0,0 +1,131 @@
|
||||
From 590cff8230749794ba09b38f3ea4eb6b0f2f73b5 Mon Sep 17 00:00:00 2001
|
||||
From: Max Reitz <mreitz@redhat.com>
|
||||
Date: Fri, 1 Nov 2019 16:25:08 +0100
|
||||
Subject: [PATCH] block: Make wait/mark serialising requests public
|
||||
|
||||
Make both bdrv_mark_request_serialising() and
|
||||
bdrv_wait_serialising_requests() public so they can be used from block
|
||||
drivers.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
Message-id: 20191101152510.11719-2-mreitz@redhat.com
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit 304d9d7f034ff7f5e1e66a65b7f720f63a72c57e)
|
||||
Conflicts:
|
||||
block/io.c
|
||||
*drop context dependency on 1acc3466a2
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/io.c | 24 ++++++++++++------------
|
||||
include/block/block_int.h | 3 +++
|
||||
2 files changed, 15 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 07d2d825c3..d4ceaaa2ce 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -694,7 +694,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
|
||||
qemu_co_mutex_unlock(&bs->reqs_lock);
|
||||
}
|
||||
|
||||
-static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
|
||||
+void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
|
||||
{
|
||||
int64_t overlap_offset = req->offset & ~(align - 1);
|
||||
uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
|
||||
@@ -784,7 +784,7 @@ void bdrv_dec_in_flight(BlockDriverState *bs)
|
||||
bdrv_wakeup(bs);
|
||||
}
|
||||
|
||||
-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
|
||||
+bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
|
||||
{
|
||||
BlockDriverState *bs = self->bs;
|
||||
BdrvTrackedRequest *req;
|
||||
@@ -1340,14 +1340,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
|
||||
* with each other for the same cluster. For example, in copy-on-read
|
||||
* it ensures that the CoR read and write operations are atomic and
|
||||
* guest writes cannot interleave between them. */
|
||||
- mark_request_serialising(req, bdrv_get_cluster_size(bs));
|
||||
+ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
|
||||
}
|
||||
|
||||
/* BDRV_REQ_SERIALISING is only for write operation */
|
||||
assert(!(flags & BDRV_REQ_SERIALISING));
|
||||
|
||||
if (!(flags & BDRV_REQ_NO_SERIALISING)) {
|
||||
- wait_serialising_requests(req);
|
||||
+ bdrv_wait_serialising_requests(req);
|
||||
}
|
||||
|
||||
if (flags & BDRV_REQ_COPY_ON_READ) {
|
||||
@@ -1736,10 +1736,10 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
|
||||
assert(!(flags & ~BDRV_REQ_MASK));
|
||||
|
||||
if (flags & BDRV_REQ_SERIALISING) {
|
||||
- mark_request_serialising(req, bdrv_get_cluster_size(bs));
|
||||
+ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
|
||||
}
|
||||
|
||||
- waited = wait_serialising_requests(req);
|
||||
+ waited = bdrv_wait_serialising_requests(req);
|
||||
|
||||
assert(!waited || !req->serialising ||
|
||||
is_request_serialising_and_aligned(req));
|
||||
@@ -1905,8 +1905,8 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
||||
|
||||
padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
||||
if (padding) {
|
||||
- mark_request_serialising(req, align);
|
||||
- wait_serialising_requests(req);
|
||||
+ bdrv_mark_request_serialising(req, align);
|
||||
+ bdrv_wait_serialising_requests(req);
|
||||
|
||||
bdrv_padding_rmw_read(child, req, &pad, true);
|
||||
|
||||
@@ -1993,8 +1993,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
|
||||
}
|
||||
|
||||
if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
|
||||
- mark_request_serialising(&req, align);
|
||||
- wait_serialising_requests(&req);
|
||||
+ bdrv_mark_request_serialising(&req, align);
|
||||
+ bdrv_wait_serialising_requests(&req);
|
||||
bdrv_padding_rmw_read(child, &req, &pad, false);
|
||||
}
|
||||
|
||||
@@ -3078,7 +3078,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(
|
||||
/* BDRV_REQ_SERIALISING is only for write operation */
|
||||
assert(!(read_flags & BDRV_REQ_SERIALISING));
|
||||
if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
|
||||
- wait_serialising_requests(&req);
|
||||
+ bdrv_wait_serialising_requests(&req);
|
||||
}
|
||||
|
||||
ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
|
||||
@@ -3205,7 +3205,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset,
|
||||
* new area, we need to make sure that no write requests are made to it
|
||||
* concurrently or they might be overwritten by preallocation. */
|
||||
if (new_bytes) {
|
||||
- mark_request_serialising(&req, 1);
|
||||
+ bdrv_mark_request_serialising(&req, 1);
|
||||
}
|
||||
if (bs->read_only) {
|
||||
error_setg(errp, "Image is read-only");
|
||||
diff --git a/include/block/block_int.h b/include/block/block_int.h
|
||||
index 3aa1e832a8..4465b02242 100644
|
||||
--- a/include/block/block_int.h
|
||||
+++ b/include/block/block_int.h
|
||||
@@ -962,6 +962,9 @@ extern unsigned int bdrv_drain_all_count;
|
||||
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
|
||||
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
|
||||
|
||||
+bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
|
||||
+void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
|
||||
+
|
||||
int get_tmp_filename(char *filename, int size);
|
||||
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
|
||||
const char *filename);
|
||||
--
|
||||
2.23.0
|
||||
95
block-create-Do-not-abort-if-a-block-driver-is-not-a.patch
Normal file
95
block-create-Do-not-abort-if-a-block-driver-is-not-a.patch
Normal file
@ -0,0 +1,95 @@
|
||||
From 088f1e8fd9e790bc5766bd43af134230abcff6dd Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
|
||||
Date: Thu, 12 Sep 2019 00:08:49 +0200
|
||||
Subject: [PATCH] block/create: Do not abort if a block driver is not available
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The 'blockdev-create' QMP command was introduced as experimental
|
||||
feature in commit b0292b851b8, using the assert() debug call.
|
||||
It got promoted to 'stable' command in 3fb588a0f2c, but the
|
||||
assert call was not removed.
|
||||
|
||||
Some block drivers are optional, and bdrv_find_format() might
|
||||
return a NULL value, triggering the assertion.
|
||||
|
||||
Stable code is not expected to abort, so return an error instead.
|
||||
|
||||
This is easily reproducible when libnfs is not installed:
|
||||
|
||||
./configure
|
||||
[...]
|
||||
module support no
|
||||
Block whitelist (rw)
|
||||
Block whitelist (ro)
|
||||
libiscsi support yes
|
||||
libnfs support no
|
||||
[...]
|
||||
|
||||
Start QEMU:
|
||||
|
||||
$ qemu-system-x86_64 -S -qmp unix:/tmp/qemu.qmp,server,nowait
|
||||
|
||||
Send the 'blockdev-create' with the 'nfs' driver:
|
||||
|
||||
$ ( cat << 'EOF'
|
||||
{'execute': 'qmp_capabilities'}
|
||||
{'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'}
|
||||
EOF
|
||||
) | socat STDIO UNIX:/tmp/qemu.qmp
|
||||
{"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 4}, "package": "v4.1.0-733-g89ea03a7dc"}, "capabilities": ["oob"]}}
|
||||
{"return": {}}
|
||||
|
||||
QEMU crashes:
|
||||
|
||||
$ gdb qemu-system-x86_64 core
|
||||
Program received signal SIGSEGV, Segmentation fault.
|
||||
(gdb) bt
|
||||
#0 0x00007ffff510957f in raise () at /lib64/libc.so.6
|
||||
#1 0x00007ffff50f3895 in abort () at /lib64/libc.so.6
|
||||
#2 0x00007ffff50f3769 in _nl_load_domain.cold.0 () at /lib64/libc.so.6
|
||||
#3 0x00007ffff5101a26 in .annobin_assert.c_end () at /lib64/libc.so.6
|
||||
#4 0x0000555555d7e1f1 in qmp_blockdev_create (job_id=0x555556baee40 "x", options=0x555557666610, errp=0x7fffffffc770) at block/create.c:69
|
||||
#5 0x0000555555c96b52 in qmp_marshal_blockdev_create (args=0x7fffdc003830, ret=0x7fffffffc7f8, errp=0x7fffffffc7f0) at qapi/qapi-commands-block-core.c:1314
|
||||
#6 0x0000555555deb0a0 in do_qmp_dispatch (cmds=0x55555645de70 <qmp_commands>, request=0x7fffdc005c70, allow_oob=false, errp=0x7fffffffc898) at qapi/qmp-dispatch.c:131
|
||||
#7 0x0000555555deb2a1 in qmp_dispatch (cmds=0x55555645de70 <qmp_commands>, request=0x7fffdc005c70, allow_oob=false) at qapi/qmp-dispatch.c:174
|
||||
|
||||
With this patch applied, QEMU returns a QMP error:
|
||||
|
||||
{'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'}
|
||||
{"id": "x", "error": {"class": "GenericError", "desc": "Block driver 'nfs' not found or not supported"}}
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Reported-by: Xu Tian <xutian@redhat.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: John Snow <jsnow@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit d90d5cae2b10efc0e8d0b3cc91ff16201853d3ba)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/create.c | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/create.c b/block/create.c
|
||||
index 95341219ef..de5e97bb18 100644
|
||||
--- a/block/create.c
|
||||
+++ b/block/create.c
|
||||
@@ -63,9 +63,13 @@ void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options,
|
||||
const char *fmt = BlockdevDriver_str(options->driver);
|
||||
BlockDriver *drv = bdrv_find_format(fmt);
|
||||
|
||||
+ if (!drv) {
|
||||
+ error_setg(errp, "Block driver '%s' not found or not supported", fmt);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
/* If the driver is in the schema, we know that it exists. But it may not
|
||||
* be whitelisted. */
|
||||
- assert(drv);
|
||||
if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) {
|
||||
error_setg(errp, "Driver is not whitelisted");
|
||||
return;
|
||||
--
|
||||
2.23.0
|
||||
69
block-file-posix-Let-post-EOF-fallocate-serialize.patch
Normal file
69
block-file-posix-Let-post-EOF-fallocate-serialize.patch
Normal file
@ -0,0 +1,69 @@
|
||||
From 7db05c8a732fbdc986a40aadf0de6dd23057d044 Mon Sep 17 00:00:00 2001
|
||||
From: Max Reitz <mreitz@redhat.com>
|
||||
Date: Fri, 1 Nov 2019 16:25:10 +0100
|
||||
Subject: [PATCH] block/file-posix: Let post-EOF fallocate serialize
|
||||
|
||||
The XFS kernel driver has a bug that may cause data corruption for qcow2
|
||||
images as of qemu commit c8bb23cbdbe32f. We can work around it by
|
||||
treating post-EOF fallocates as serializing up until infinity (INT64_MAX
|
||||
in practice).
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
Message-id: 20191101152510.11719-4-mreitz@redhat.com
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit 292d06b925b2787ee6f2430996b95651cae42fce)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/file-posix.c | 36 ++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 36 insertions(+)
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index 992eb4a798..c5df61b477 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -2623,6 +2623,42 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
|
||||
RawPosixAIOData acb;
|
||||
ThreadPoolFunc *handler;
|
||||
|
||||
+#ifdef CONFIG_FALLOCATE
|
||||
+ if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
|
||||
+ BdrvTrackedRequest *req;
|
||||
+ uint64_t end;
|
||||
+
|
||||
+ /*
|
||||
+ * This is a workaround for a bug in the Linux XFS driver,
|
||||
+ * where writes submitted through the AIO interface will be
|
||||
+ * discarded if they happen beyond a concurrently running
|
||||
+ * fallocate() that increases the file length (i.e., both the
|
||||
+ * write and the fallocate() happen beyond the EOF).
|
||||
+ *
|
||||
+ * To work around it, we extend the tracked request for this
|
||||
+ * zero write until INT64_MAX (effectively infinity), and mark
|
||||
+ * it as serializing.
|
||||
+ *
|
||||
+ * We have to enable this workaround for all filesystems and
|
||||
+ * AIO modes (not just XFS with aio=native), because for
|
||||
+ * remote filesystems we do not know the host configuration.
|
||||
+ */
|
||||
+
|
||||
+ req = bdrv_co_get_self_request(bs);
|
||||
+ assert(req);
|
||||
+ assert(req->type == BDRV_TRACKED_WRITE);
|
||||
+ assert(req->offset <= offset);
|
||||
+ assert(req->offset + req->bytes >= offset + bytes);
|
||||
+
|
||||
+ end = INT64_MAX & -(uint64_t)bs->bl.request_alignment;
|
||||
+ req->bytes = end - req->offset;
|
||||
+ req->overlap_bytes = req->bytes;
|
||||
+
|
||||
+ bdrv_mark_request_serialising(req, bs->bl.request_alignment);
|
||||
+ bdrv_wait_serialising_requests(req);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
acb = (RawPosixAIOData) {
|
||||
.bs = bs,
|
||||
.aio_fildes = s->fd,
|
||||
--
|
||||
2.23.0
|
||||
165
block-file-posix-Reduce-xfsctl-use.patch
Normal file
165
block-file-posix-Reduce-xfsctl-use.patch
Normal file
@ -0,0 +1,165 @@
|
||||
From 6f1a94035b02d3676a897ea5fa4cda4c62128228 Mon Sep 17 00:00:00 2001
|
||||
From: Max Reitz <mreitz@redhat.com>
|
||||
Date: Fri, 23 Aug 2019 15:03:40 +0200
|
||||
Subject: [PATCH] block/file-posix: Reduce xfsctl() use
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This patch removes xfs_write_zeroes() and xfs_discard(). Both functions
|
||||
have been added just before the same feature was present through
|
||||
fallocate():
|
||||
|
||||
- fallocate() has supported PUNCH_HOLE for XFS since Linux 2.6.38 (March
|
||||
2011); xfs_discard() was added in December 2010.
|
||||
|
||||
- fallocate() has supported ZERO_RANGE for XFS since Linux 3.15 (June
|
||||
2014); xfs_write_zeroes() was added in November 2013.
|
||||
|
||||
Nowadays, all systems that qemu runs on should support both fallocate()
|
||||
features (RHEL 7's kernel does).
|
||||
|
||||
xfsctl() is still useful for getting the request alignment for O_DIRECT,
|
||||
so this patch does not remove our dependency on it completely.
|
||||
|
||||
Note that xfs_write_zeroes() had a bug: It calls ftruncate() when the
|
||||
file is shorter than the specified range (because ZERO_RANGE does not
|
||||
increase the file length). ftruncate() may yield and then discard data
|
||||
that parallel write requests have written past the EOF in the meantime.
|
||||
Dropping the function altogether fixes the bug.
|
||||
|
||||
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Fixes: 50ba5b2d994853b38fed10e0841b119da0f8b8e5
|
||||
Reported-by: Lukáš Doktor <ldoktor@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Reviewed-by: John Snow <jsnow@redhat.com>
|
||||
Tested-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
Tested-by: John Snow <jsnow@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit b2c6f23f4a9f6d8f1b648705cd46d3713b78d6a2)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/file-posix.c | 77 +---------------------------------------------
|
||||
1 file changed, 1 insertion(+), 76 deletions(-)
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index 4479cc7ab4..992eb4a798 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -1445,59 +1445,6 @@ out:
|
||||
}
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_XFS
|
||||
-static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
|
||||
-{
|
||||
- int64_t len;
|
||||
- struct xfs_flock64 fl;
|
||||
- int err;
|
||||
-
|
||||
- len = lseek(s->fd, 0, SEEK_END);
|
||||
- if (len < 0) {
|
||||
- return -errno;
|
||||
- }
|
||||
-
|
||||
- if (offset + bytes > len) {
|
||||
- /* XFS_IOC_ZERO_RANGE does not increase the file length */
|
||||
- if (ftruncate(s->fd, offset + bytes) < 0) {
|
||||
- return -errno;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- memset(&fl, 0, sizeof(fl));
|
||||
- fl.l_whence = SEEK_SET;
|
||||
- fl.l_start = offset;
|
||||
- fl.l_len = bytes;
|
||||
-
|
||||
- if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
|
||||
- err = errno;
|
||||
- trace_file_xfs_write_zeroes(strerror(errno));
|
||||
- return -err;
|
||||
- }
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
|
||||
-{
|
||||
- struct xfs_flock64 fl;
|
||||
- int err;
|
||||
-
|
||||
- memset(&fl, 0, sizeof(fl));
|
||||
- fl.l_whence = SEEK_SET;
|
||||
- fl.l_start = offset;
|
||||
- fl.l_len = bytes;
|
||||
-
|
||||
- if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
|
||||
- err = errno;
|
||||
- trace_file_xfs_discard(strerror(errno));
|
||||
- return -err;
|
||||
- }
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-#endif
|
||||
-
|
||||
static int translate_err(int err)
|
||||
{
|
||||
if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
|
||||
@@ -1553,10 +1500,8 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb)
|
||||
static int handle_aiocb_write_zeroes(void *opaque)
|
||||
{
|
||||
RawPosixAIOData *aiocb = opaque;
|
||||
-#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS)
|
||||
- BDRVRawState *s = aiocb->bs->opaque;
|
||||
-#endif
|
||||
#ifdef CONFIG_FALLOCATE
|
||||
+ BDRVRawState *s = aiocb->bs->opaque;
|
||||
int64_t len;
|
||||
#endif
|
||||
|
||||
@@ -1564,12 +1509,6 @@ static int handle_aiocb_write_zeroes(void *opaque)
|
||||
return handle_aiocb_write_zeroes_block(aiocb);
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_XFS
|
||||
- if (s->is_xfs) {
|
||||
- return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
|
||||
- }
|
||||
-#endif
|
||||
-
|
||||
#ifdef CONFIG_FALLOCATE_ZERO_RANGE
|
||||
if (s->has_write_zeroes) {
|
||||
int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
|
||||
@@ -1632,14 +1571,6 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque)
|
||||
}
|
||||
#endif
|
||||
|
||||
-#ifdef CONFIG_XFS
|
||||
- if (s->is_xfs) {
|
||||
- /* xfs_discard() guarantees that the discarded area reads as all-zero
|
||||
- * afterwards, so we can use it here. */
|
||||
- return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
|
||||
- }
|
||||
-#endif
|
||||
-
|
||||
/* If we couldn't manage to unmap while guaranteed that the area reads as
|
||||
* all-zero afterwards, just write zeroes without unmapping */
|
||||
ret = handle_aiocb_write_zeroes(aiocb);
|
||||
@@ -1716,12 +1647,6 @@ static int handle_aiocb_discard(void *opaque)
|
||||
ret = -errno;
|
||||
#endif
|
||||
} else {
|
||||
-#ifdef CONFIG_XFS
|
||||
- if (s->is_xfs) {
|
||||
- return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
|
||||
- }
|
||||
-#endif
|
||||
-
|
||||
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
|
||||
ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
aiocb->aio_offset, aiocb->aio_nbytes);
|
||||
--
|
||||
2.23.0
|
||||
481
block-io-refactor-padding.patch
Normal file
481
block-io-refactor-padding.patch
Normal file
@ -0,0 +1,481 @@
|
||||
From 2e2ad02f2cecf419eaad0df982ceb5b41170cc7e Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Date: Tue, 4 Jun 2019 19:15:05 +0300
|
||||
Subject: [PATCH] block/io: refactor padding
|
||||
|
||||
We have similar padding code in bdrv_co_pwritev,
|
||||
bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify
|
||||
it.
|
||||
|
||||
[Squashed in Vladimir's qemu-iotests 077 fix
|
||||
--Stefan]
|
||||
|
||||
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com
|
||||
Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit 7a3f542fbdfd799be4fa6f8b96dc8c1e6933fce4)
|
||||
*prereq for 292d06b9
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/io.c | 365 +++++++++++++++++++++++++++++------------------------
|
||||
1 file changed, 200 insertions(+), 165 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index dccf687acc..07d2d825c3 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1408,28 +1408,177 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
- * Handle a read request in coroutine context
|
||||
+ * Request padding
|
||||
+ *
|
||||
+ * |<---- align ----->| |<----- align ---->|
|
||||
+ * |<- head ->|<------------- bytes ------------->|<-- tail -->|
|
||||
+ * | | | | | |
|
||||
+ * -*----------$-------*-------- ... --------*-----$------------*---
|
||||
+ * | | | | | |
|
||||
+ * | offset | | end |
|
||||
+ * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end)
|
||||
+ * [buf ... ) [tail_buf )
|
||||
+ *
|
||||
+ * @buf is an aligned allocation needed to store @head and @tail paddings. @head
|
||||
+ * is placed at the beginning of @buf and @tail at the @end.
|
||||
+ *
|
||||
+ * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk
|
||||
+ * around tail, if tail exists.
|
||||
+ *
|
||||
+ * @merge_reads is true for small requests,
|
||||
+ * if @buf_len == @head + bytes + @tail. In this case it is possible that both
|
||||
+ * head and tail exist but @buf_len == align and @tail_buf == @buf.
|
||||
+ */
|
||||
+typedef struct BdrvRequestPadding {
|
||||
+ uint8_t *buf;
|
||||
+ size_t buf_len;
|
||||
+ uint8_t *tail_buf;
|
||||
+ size_t head;
|
||||
+ size_t tail;
|
||||
+ bool merge_reads;
|
||||
+ QEMUIOVector local_qiov;
|
||||
+} BdrvRequestPadding;
|
||||
+
|
||||
+static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
+ int64_t offset, int64_t bytes,
|
||||
+ BdrvRequestPadding *pad)
|
||||
+{
|
||||
+ uint64_t align = bs->bl.request_alignment;
|
||||
+ size_t sum;
|
||||
+
|
||||
+ memset(pad, 0, sizeof(*pad));
|
||||
+
|
||||
+ pad->head = offset & (align - 1);
|
||||
+ pad->tail = ((offset + bytes) & (align - 1));
|
||||
+ if (pad->tail) {
|
||||
+ pad->tail = align - pad->tail;
|
||||
+ }
|
||||
+
|
||||
+ if ((!pad->head && !pad->tail) || !bytes) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ sum = pad->head + bytes + pad->tail;
|
||||
+ pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
|
||||
+ pad->buf = qemu_blockalign(bs, pad->buf_len);
|
||||
+ pad->merge_reads = sum == pad->buf_len;
|
||||
+ if (pad->tail) {
|
||||
+ pad->tail_buf = pad->buf + pad->buf_len - align;
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static int bdrv_padding_rmw_read(BdrvChild *child,
|
||||
+ BdrvTrackedRequest *req,
|
||||
+ BdrvRequestPadding *pad,
|
||||
+ bool zero_middle)
|
||||
+{
|
||||
+ QEMUIOVector local_qiov;
|
||||
+ BlockDriverState *bs = child->bs;
|
||||
+ uint64_t align = bs->bl.request_alignment;
|
||||
+ int ret;
|
||||
+
|
||||
+ assert(req->serialising && pad->buf);
|
||||
+
|
||||
+ if (pad->head || pad->merge_reads) {
|
||||
+ uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
|
||||
+
|
||||
+ qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
|
||||
+
|
||||
+ if (pad->head) {
|
||||
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
|
||||
+ }
|
||||
+ if (pad->merge_reads && pad->tail) {
|
||||
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
|
||||
+ }
|
||||
+ ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
|
||||
+ align, &local_qiov, 0);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ if (pad->head) {
|
||||
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
|
||||
+ }
|
||||
+ if (pad->merge_reads && pad->tail) {
|
||||
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
|
||||
+ }
|
||||
+
|
||||
+ if (pad->merge_reads) {
|
||||
+ goto zero_mem;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (pad->tail) {
|
||||
+ qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
|
||||
+
|
||||
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
|
||||
+ ret = bdrv_aligned_preadv(
|
||||
+ child, req,
|
||||
+ req->overlap_offset + req->overlap_bytes - align,
|
||||
+ align, align, &local_qiov, 0);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
|
||||
+ }
|
||||
+
|
||||
+zero_mem:
|
||||
+ if (zero_middle) {
|
||||
+ memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
+{
|
||||
+ if (pad->buf) {
|
||||
+ qemu_vfree(pad->buf);
|
||||
+ qemu_iovec_destroy(&pad->local_qiov);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * bdrv_pad_request
|
||||
+ *
|
||||
+ * Exchange request parameters with padded request if needed. Don't include RMW
|
||||
+ * read of padding, bdrv_padding_rmw_read() should be called separately if
|
||||
+ * needed.
|
||||
+ *
|
||||
+ * All parameters except @bs are in-out: they represent original request at
|
||||
+ * function call and padded (if padding needed) at function finish.
|
||||
+ *
|
||||
+ * Function always succeeds.
|
||||
*/
|
||||
+static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
|
||||
+ int64_t *offset, unsigned int *bytes,
|
||||
+ BdrvRequestPadding *pad)
|
||||
+{
|
||||
+ if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
|
||||
+ *qiov, 0, *bytes,
|
||||
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
|
||||
+ *bytes += pad->head + pad->tail;
|
||||
+ *offset -= pad->head;
|
||||
+ *qiov = &pad->local_qiov;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
|
||||
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
|
||||
BdrvRequestFlags flags)
|
||||
{
|
||||
BlockDriverState *bs = child->bs;
|
||||
- BlockDriver *drv = bs->drv;
|
||||
BdrvTrackedRequest req;
|
||||
-
|
||||
- uint64_t align = bs->bl.request_alignment;
|
||||
- uint8_t *head_buf = NULL;
|
||||
- uint8_t *tail_buf = NULL;
|
||||
- QEMUIOVector local_qiov;
|
||||
- bool use_local_qiov = false;
|
||||
+ BdrvRequestPadding pad;
|
||||
int ret;
|
||||
|
||||
- trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
|
||||
-
|
||||
- if (!drv) {
|
||||
- return -ENOMEDIUM;
|
||||
- }
|
||||
+ trace_bdrv_co_preadv(bs, offset, bytes, flags);
|
||||
|
||||
ret = bdrv_check_byte_request(bs, offset, bytes);
|
||||
if (ret < 0) {
|
||||
@@ -1443,43 +1592,16 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
|
||||
flags |= BDRV_REQ_COPY_ON_READ;
|
||||
}
|
||||
|
||||
- /* Align read if necessary by padding qiov */
|
||||
- if (offset & (align - 1)) {
|
||||
- head_buf = qemu_blockalign(bs, align);
|
||||
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
|
||||
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
|
||||
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
||||
- use_local_qiov = true;
|
||||
-
|
||||
- bytes += offset & (align - 1);
|
||||
- offset = offset & ~(align - 1);
|
||||
- }
|
||||
-
|
||||
- if ((offset + bytes) & (align - 1)) {
|
||||
- if (!use_local_qiov) {
|
||||
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
|
||||
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
||||
- use_local_qiov = true;
|
||||
- }
|
||||
- tail_buf = qemu_blockalign(bs, align);
|
||||
- qemu_iovec_add(&local_qiov, tail_buf,
|
||||
- align - ((offset + bytes) & (align - 1)));
|
||||
-
|
||||
- bytes = ROUND_UP(bytes, align);
|
||||
- }
|
||||
+ bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad);
|
||||
|
||||
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
|
||||
- ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
|
||||
- use_local_qiov ? &local_qiov : qiov,
|
||||
- flags);
|
||||
+ ret = bdrv_aligned_preadv(child, &req, offset, bytes,
|
||||
+ bs->bl.request_alignment,
|
||||
+ qiov, flags);
|
||||
tracked_request_end(&req);
|
||||
bdrv_dec_in_flight(bs);
|
||||
|
||||
- if (use_local_qiov) {
|
||||
- qemu_iovec_destroy(&local_qiov);
|
||||
- qemu_vfree(head_buf);
|
||||
- qemu_vfree(tail_buf);
|
||||
- }
|
||||
+ bdrv_padding_destroy(&pad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1775,44 +1897,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
||||
BdrvTrackedRequest *req)
|
||||
{
|
||||
BlockDriverState *bs = child->bs;
|
||||
- uint8_t *buf = NULL;
|
||||
QEMUIOVector local_qiov;
|
||||
uint64_t align = bs->bl.request_alignment;
|
||||
- unsigned int head_padding_bytes, tail_padding_bytes;
|
||||
int ret = 0;
|
||||
+ bool padding;
|
||||
+ BdrvRequestPadding pad;
|
||||
|
||||
- head_padding_bytes = offset & (align - 1);
|
||||
- tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
|
||||
-
|
||||
-
|
||||
- assert(flags & BDRV_REQ_ZERO_WRITE);
|
||||
- if (head_padding_bytes || tail_padding_bytes) {
|
||||
- buf = qemu_blockalign(bs, align);
|
||||
- qemu_iovec_init_buf(&local_qiov, buf, align);
|
||||
- }
|
||||
- if (head_padding_bytes) {
|
||||
- uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
|
||||
-
|
||||
- /* RMW the unaligned part before head. */
|
||||
+ padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
||||
+ if (padding) {
|
||||
mark_request_serialising(req, align);
|
||||
wait_serialising_requests(req);
|
||||
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
|
||||
- ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
|
||||
- align, &local_qiov, 0);
|
||||
- if (ret < 0) {
|
||||
- goto fail;
|
||||
- }
|
||||
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
|
||||
|
||||
- memset(buf + head_padding_bytes, 0, zero_bytes);
|
||||
- ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
|
||||
- align, &local_qiov,
|
||||
- flags & ~BDRV_REQ_ZERO_WRITE);
|
||||
- if (ret < 0) {
|
||||
- goto fail;
|
||||
+ bdrv_padding_rmw_read(child, req, &pad, true);
|
||||
+
|
||||
+ if (pad.head || pad.merge_reads) {
|
||||
+ int64_t aligned_offset = offset & ~(align - 1);
|
||||
+ int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
|
||||
+
|
||||
+ qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
|
||||
+ ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
|
||||
+ align, &local_qiov,
|
||||
+ flags & ~BDRV_REQ_ZERO_WRITE);
|
||||
+ if (ret < 0 || pad.merge_reads) {
|
||||
+ /* Error or all work is done */
|
||||
+ goto out;
|
||||
+ }
|
||||
+ offset += write_bytes - pad.head;
|
||||
+ bytes -= write_bytes - pad.head;
|
||||
}
|
||||
- offset += zero_bytes;
|
||||
- bytes -= zero_bytes;
|
||||
}
|
||||
|
||||
assert(!bytes || (offset & (align - 1)) == 0);
|
||||
@@ -1822,7 +1934,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
||||
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
|
||||
NULL, flags);
|
||||
if (ret < 0) {
|
||||
- goto fail;
|
||||
+ goto out;
|
||||
}
|
||||
bytes -= aligned_bytes;
|
||||
offset += aligned_bytes;
|
||||
@@ -1830,26 +1942,17 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
||||
|
||||
assert(!bytes || (offset & (align - 1)) == 0);
|
||||
if (bytes) {
|
||||
- assert(align == tail_padding_bytes + bytes);
|
||||
- /* RMW the unaligned part after tail. */
|
||||
- mark_request_serialising(req, align);
|
||||
- wait_serialising_requests(req);
|
||||
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
|
||||
- ret = bdrv_aligned_preadv(child, req, offset, align,
|
||||
- align, &local_qiov, 0);
|
||||
- if (ret < 0) {
|
||||
- goto fail;
|
||||
- }
|
||||
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
|
||||
+ assert(align == pad.tail + bytes);
|
||||
|
||||
- memset(buf, 0, bytes);
|
||||
+ qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
|
||||
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
|
||||
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
|
||||
}
|
||||
-fail:
|
||||
- qemu_vfree(buf);
|
||||
- return ret;
|
||||
|
||||
+out:
|
||||
+ bdrv_padding_destroy(&pad);
|
||||
+
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1862,10 +1965,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
|
||||
BlockDriverState *bs = child->bs;
|
||||
BdrvTrackedRequest req;
|
||||
uint64_t align = bs->bl.request_alignment;
|
||||
- uint8_t *head_buf = NULL;
|
||||
- uint8_t *tail_buf = NULL;
|
||||
- QEMUIOVector local_qiov;
|
||||
- bool use_local_qiov = false;
|
||||
+ BdrvRequestPadding pad;
|
||||
int ret;
|
||||
|
||||
trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
|
||||
@@ -1892,86 +1992,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
|
||||
goto out;
|
||||
}
|
||||
|
||||
- if (offset & (align - 1)) {
|
||||
- QEMUIOVector head_qiov;
|
||||
-
|
||||
+ if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
|
||||
mark_request_serialising(&req, align);
|
||||
wait_serialising_requests(&req);
|
||||
-
|
||||
- head_buf = qemu_blockalign(bs, align);
|
||||
- qemu_iovec_init_buf(&head_qiov, head_buf, align);
|
||||
-
|
||||
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
|
||||
- ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
|
||||
- align, &head_qiov, 0);
|
||||
- if (ret < 0) {
|
||||
- goto fail;
|
||||
- }
|
||||
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
|
||||
-
|
||||
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
|
||||
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
|
||||
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
||||
- use_local_qiov = true;
|
||||
-
|
||||
- bytes += offset & (align - 1);
|
||||
- offset = offset & ~(align - 1);
|
||||
-
|
||||
- /* We have read the tail already if the request is smaller
|
||||
- * than one aligned block.
|
||||
- */
|
||||
- if (bytes < align) {
|
||||
- qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
|
||||
- bytes = align;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- if ((offset + bytes) & (align - 1)) {
|
||||
- QEMUIOVector tail_qiov;
|
||||
- size_t tail_bytes;
|
||||
- bool waited;
|
||||
-
|
||||
- mark_request_serialising(&req, align);
|
||||
- waited = wait_serialising_requests(&req);
|
||||
- assert(!waited || !use_local_qiov);
|
||||
-
|
||||
- tail_buf = qemu_blockalign(bs, align);
|
||||
- qemu_iovec_init_buf(&tail_qiov, tail_buf, align);
|
||||
-
|
||||
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
|
||||
- ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
|
||||
- align, align, &tail_qiov, 0);
|
||||
- if (ret < 0) {
|
||||
- goto fail;
|
||||
- }
|
||||
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
|
||||
-
|
||||
- if (!use_local_qiov) {
|
||||
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
|
||||
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
|
||||
- use_local_qiov = true;
|
||||
- }
|
||||
-
|
||||
- tail_bytes = (offset + bytes) & (align - 1);
|
||||
- qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
|
||||
-
|
||||
- bytes = ROUND_UP(bytes, align);
|
||||
+ bdrv_padding_rmw_read(child, &req, &pad, false);
|
||||
}
|
||||
|
||||
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
|
||||
- use_local_qiov ? &local_qiov : qiov,
|
||||
- flags);
|
||||
+ qiov, flags);
|
||||
|
||||
-fail:
|
||||
+ bdrv_padding_destroy(&pad);
|
||||
|
||||
- if (use_local_qiov) {
|
||||
- qemu_iovec_destroy(&local_qiov);
|
||||
- }
|
||||
- qemu_vfree(head_buf);
|
||||
- qemu_vfree(tail_buf);
|
||||
out:
|
||||
tracked_request_end(&req);
|
||||
bdrv_dec_in_flight(bs);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
--
|
||||
2.23.0
|
||||
41
block-nfs-tear-down-aio-before-nfs_close.patch
Normal file
41
block-nfs-tear-down-aio-before-nfs_close.patch
Normal file
@ -0,0 +1,41 @@
|
||||
From 0694c489cd240620fee5675e8d24c7ce02d1d67d Mon Sep 17 00:00:00 2001
|
||||
From: Peter Lieven <pl@kamp.de>
|
||||
Date: Tue, 10 Sep 2019 17:41:09 +0200
|
||||
Subject: [PATCH] block/nfs: tear down aio before nfs_close
|
||||
|
||||
nfs_close is a sync call from libnfs and has its own event
|
||||
handler polling on the nfs FD. Avoid that both QEMU and libnfs
|
||||
are intefering here.
|
||||
|
||||
CC: qemu-stable@nongnu.org
|
||||
Signed-off-by: Peter Lieven <pl@kamp.de>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 601dc6559725f7a614b6f893611e17ff0908e914)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/nfs.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block/nfs.c b/block/nfs.c
|
||||
index d93241b3bb..2b7a078241 100644
|
||||
--- a/block/nfs.c
|
||||
+++ b/block/nfs.c
|
||||
@@ -390,12 +390,14 @@ static void nfs_attach_aio_context(BlockDriverState *bs,
|
||||
static void nfs_client_close(NFSClient *client)
|
||||
{
|
||||
if (client->context) {
|
||||
+ qemu_mutex_lock(&client->mutex);
|
||||
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
|
||||
+ false, NULL, NULL, NULL, NULL);
|
||||
+ qemu_mutex_unlock(&client->mutex);
|
||||
if (client->fh) {
|
||||
nfs_close(client->context, client->fh);
|
||||
client->fh = NULL;
|
||||
}
|
||||
- aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
|
||||
- false, NULL, NULL, NULL, NULL);
|
||||
nfs_destroy_context(client->context);
|
||||
client->context = NULL;
|
||||
}
|
||||
--
|
||||
2.23.0
|
||||
343
block-posix-Always-allocate-the-first-block.patch
Normal file
343
block-posix-Always-allocate-the-first-block.patch
Normal file
@ -0,0 +1,343 @@
|
||||
From 3d018ff3bdd8aec260254036b600cfa8d694ced4 Mon Sep 17 00:00:00 2001
|
||||
From: Nir Soffer <nirsof@gmail.com>
|
||||
Date: Tue, 27 Aug 2019 04:05:27 +0300
|
||||
Subject: [PATCH] block: posix: Always allocate the first block
|
||||
|
||||
When creating an image with preallocation "off" or "falloc", the first
|
||||
block of the image is typically not allocated. When using Gluster
|
||||
storage backed by XFS filesystem, reading this block using direct I/O
|
||||
succeeds regardless of request length, fooling alignment detection.
|
||||
|
||||
In this case we fallback to a safe value (4096) instead of the optimal
|
||||
value (512), which may lead to unneeded data copying when aligning
|
||||
requests. Allocating the first block avoids the fallback.
|
||||
|
||||
Since we allocate the first block even with preallocation=off, we no
|
||||
longer create images with zero disk size:
|
||||
|
||||
$ ./qemu-img create -f raw test.raw 1g
|
||||
Formatting 'test.raw', fmt=raw size=1073741824
|
||||
|
||||
$ ls -lhs test.raw
|
||||
4.0K -rw-r--r--. 1 nsoffer nsoffer 1.0G Aug 16 23:48 test.raw
|
||||
|
||||
And converting the image requires additional cluster:
|
||||
|
||||
$ ./qemu-img measure -f raw -O qcow2 test.raw
|
||||
required size: 458752
|
||||
fully allocated size: 1074135040
|
||||
|
||||
When using format like vmdk with multiple files per image, we allocate
|
||||
one block per file:
|
||||
|
||||
$ ./qemu-img create -f vmdk -o subformat=twoGbMaxExtentFlat test.vmdk 4g
|
||||
Formatting 'test.vmdk', fmt=vmdk size=4294967296 compat6=off hwversion=undefined subformat=twoGbMaxExtentFlat
|
||||
|
||||
$ ls -lhs test*.vmdk
|
||||
4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f001.vmdk
|
||||
4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f002.vmdk
|
||||
4.0K -rw-r--r--. 1 nsoffer nsoffer 353 Aug 27 03:23 test.vmdk
|
||||
|
||||
I did quick performance test for copying disks with qemu-img convert to
|
||||
new raw target image to Gluster storage with sector size of 512 bytes:
|
||||
|
||||
for i in $(seq 10); do
|
||||
rm -f dst.raw
|
||||
sleep 10
|
||||
time ./qemu-img convert -f raw -O raw -t none -T none src.raw dst.raw
|
||||
done
|
||||
|
||||
Here is a table comparing the total time spent:
|
||||
|
||||
Type Before(s) After(s) Diff(%)
|
||||
---------------------------------------
|
||||
real 530.028 469.123 -11.4
|
||||
user 17.204 10.768 -37.4
|
||||
sys 17.881 7.011 -60.7
|
||||
|
||||
We can see very clear improvement in CPU usage.
|
||||
|
||||
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
|
||||
Message-id: 20190827010528.8818-2-nsoffer@redhat.com
|
||||
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
|
||||
(cherry picked from commit 3a20013fbb26d2a1bd11ef148eefdb1508783787)
|
||||
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/file-posix.c | 51 +++++++++++++++++++
|
||||
tests/qemu-iotests/059.out | 2 +-
|
||||
tests/qemu-iotests/{150.out => 150.out.qcow2} | 0
|
||||
tests/qemu-iotests/150.out.raw | 12 +++++
|
||||
tests/qemu-iotests/175 | 19 ++++---
|
||||
tests/qemu-iotests/175.out | 8 +--
|
||||
tests/qemu-iotests/178.out.qcow2 | 4 +-
|
||||
tests/qemu-iotests/221.out | 12 +++--
|
||||
tests/qemu-iotests/253.out | 12 +++--
|
||||
9 files changed, 99 insertions(+), 21 deletions(-)
|
||||
rename tests/qemu-iotests/{150.out => 150.out.qcow2} (100%)
|
||||
create mode 100644 tests/qemu-iotests/150.out.raw
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index be32dd8c51..2184aa980c 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -1674,6 +1674,43 @@ static int handle_aiocb_discard(void *opaque)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Help alignment probing by allocating the first block.
|
||||
+ *
|
||||
+ * When reading with direct I/O from unallocated area on Gluster backed by XFS,
|
||||
+ * reading succeeds regardless of request length. In this case we fallback to
|
||||
+ * safe alignment which is not optimal. Allocating the first block avoids this
|
||||
+ * fallback.
|
||||
+ *
|
||||
+ * fd may be opened with O_DIRECT, but we don't know the buffer alignment or
|
||||
+ * request alignment, so we use safe values.
|
||||
+ *
|
||||
+ * Returns: 0 on success, -errno on failure. Since this is an optimization,
|
||||
+ * caller may ignore failures.
|
||||
+ */
|
||||
+static int allocate_first_block(int fd, size_t max_size)
|
||||
+{
|
||||
+ size_t write_size = (max_size < MAX_BLOCKSIZE)
|
||||
+ ? BDRV_SECTOR_SIZE
|
||||
+ : MAX_BLOCKSIZE;
|
||||
+ size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
|
||||
+ void *buf;
|
||||
+ ssize_t n;
|
||||
+ int ret;
|
||||
+
|
||||
+ buf = qemu_memalign(max_align, write_size);
|
||||
+ memset(buf, 0, write_size);
|
||||
+
|
||||
+ do {
|
||||
+ n = pwrite(fd, buf, write_size, 0);
|
||||
+ } while (n == -1 && errno == EINTR);
|
||||
+
|
||||
+ ret = (n == -1) ? -errno : 0;
|
||||
+
|
||||
+ qemu_vfree(buf);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int handle_aiocb_truncate(void *opaque)
|
||||
{
|
||||
RawPosixAIOData *aiocb = opaque;
|
||||
@@ -1713,6 +1750,17 @@ static int handle_aiocb_truncate(void *opaque)
|
||||
/* posix_fallocate() doesn't set errno. */
|
||||
error_setg_errno(errp, -result,
|
||||
"Could not preallocate new data");
|
||||
+ } else if (current_length == 0) {
|
||||
+ /*
|
||||
+ * posix_fallocate() uses fallocate() if the filesystem
|
||||
+ * supports it, or fallback to manually writing zeroes. If
|
||||
+ * fallocate() was used, unaligned reads from the fallocated
|
||||
+ * area in raw_probe_alignment() will succeed, hence we need to
|
||||
+ * allocate the first block.
|
||||
+ *
|
||||
+ * Optimize future alignment probing; ignore failures.
|
||||
+ */
|
||||
+ allocate_first_block(fd, offset);
|
||||
}
|
||||
} else {
|
||||
result = 0;
|
||||
@@ -1774,6 +1822,9 @@ static int handle_aiocb_truncate(void *opaque)
|
||||
if (ftruncate(fd, offset) != 0) {
|
||||
result = -errno;
|
||||
error_setg_errno(errp, -result, "Could not resize file");
|
||||
+ } else if (current_length == 0 && offset > current_length) {
|
||||
+ /* Optimize future alignment probing; ignore failures. */
|
||||
+ allocate_first_block(fd, offset);
|
||||
}
|
||||
return result;
|
||||
default:
|
||||
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
|
||||
index 4fab42a28c..fe3f861f3c 100644
|
||||
--- a/tests/qemu-iotests/059.out
|
||||
+++ b/tests/qemu-iotests/059.out
|
||||
@@ -27,7 +27,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824000 subformat=twoGbMax
|
||||
image: TEST_DIR/t.vmdk
|
||||
file format: vmdk
|
||||
virtual size: 0.977 TiB (1073741824000 bytes)
|
||||
-disk size: 16 KiB
|
||||
+disk size: 1.97 MiB
|
||||
Format specific information:
|
||||
cid: XXXXXXXX
|
||||
parent cid: XXXXXXXX
|
||||
diff --git a/tests/qemu-iotests/150.out b/tests/qemu-iotests/150.out.qcow2
|
||||
similarity index 100%
|
||||
rename from tests/qemu-iotests/150.out
|
||||
rename to tests/qemu-iotests/150.out.qcow2
|
||||
diff --git a/tests/qemu-iotests/150.out.raw b/tests/qemu-iotests/150.out.raw
|
||||
new file mode 100644
|
||||
index 0000000000..3cdc7727a5
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/150.out.raw
|
||||
@@ -0,0 +1,12 @@
|
||||
+QA output created by 150
|
||||
+
|
||||
+=== Mapping sparse conversion ===
|
||||
+
|
||||
+Offset Length File
|
||||
+0 0x1000 TEST_DIR/t.IMGFMT
|
||||
+
|
||||
+=== Mapping non-sparse conversion ===
|
||||
+
|
||||
+Offset Length File
|
||||
+0 0x100000 TEST_DIR/t.IMGFMT
|
||||
+*** done
|
||||
diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175
|
||||
index 51e62c8276..7ba28b3c1b 100755
|
||||
--- a/tests/qemu-iotests/175
|
||||
+++ b/tests/qemu-iotests/175
|
||||
@@ -37,14 +37,16 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
# the file size. This function hides the resulting difference in the
|
||||
# stat -c '%b' output.
|
||||
# Parameter 1: Number of blocks an empty file occupies
|
||||
-# Parameter 2: Image size in bytes
|
||||
+# Parameter 2: Minimal number of blocks in an image
|
||||
+# Parameter 3: Image size in bytes
|
||||
_filter_blocks()
|
||||
{
|
||||
extra_blocks=$1
|
||||
- img_size=$2
|
||||
+ min_blocks=$2
|
||||
+ img_size=$3
|
||||
|
||||
- sed -e "s/blocks=$extra_blocks\\(\$\\|[^0-9]\\)/nothing allocated/" \
|
||||
- -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/everything allocated/"
|
||||
+ sed -e "s/blocks=$min_blocks\\(\$\\|[^0-9]\\)/min allocation/" \
|
||||
+ -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/max allocation/"
|
||||
}
|
||||
|
||||
# get standard environment, filters and checks
|
||||
@@ -60,16 +62,21 @@ size=$((1 * 1024 * 1024))
|
||||
touch "$TEST_DIR/empty"
|
||||
extra_blocks=$(stat -c '%b' "$TEST_DIR/empty")
|
||||
|
||||
+# We always write the first byte; check how many blocks this filesystem
|
||||
+# allocates to match empty image alloation.
|
||||
+printf "\0" > "$TEST_DIR/empty"
|
||||
+min_blocks=$(stat -c '%b' "$TEST_DIR/empty")
|
||||
+
|
||||
echo
|
||||
echo "== creating image with default preallocation =="
|
||||
_make_test_img $size | _filter_imgfmt
|
||||
-stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size
|
||||
+stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size
|
||||
|
||||
for mode in off full falloc; do
|
||||
echo
|
||||
echo "== creating image with preallocation $mode =="
|
||||
IMGOPTS=preallocation=$mode _make_test_img $size | _filter_imgfmt
|
||||
- stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size
|
||||
+ stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size
|
||||
done
|
||||
|
||||
# success, all done
|
||||
diff --git a/tests/qemu-iotests/175.out b/tests/qemu-iotests/175.out
|
||||
index 6d9a5ed84e..263e521262 100644
|
||||
--- a/tests/qemu-iotests/175.out
|
||||
+++ b/tests/qemu-iotests/175.out
|
||||
@@ -2,17 +2,17 @@ QA output created by 175
|
||||
|
||||
== creating image with default preallocation ==
|
||||
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
|
||||
-size=1048576, nothing allocated
|
||||
+size=1048576, min allocation
|
||||
|
||||
== creating image with preallocation off ==
|
||||
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=off
|
||||
-size=1048576, nothing allocated
|
||||
+size=1048576, min allocation
|
||||
|
||||
== creating image with preallocation full ==
|
||||
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=full
|
||||
-size=1048576, everything allocated
|
||||
+size=1048576, max allocation
|
||||
|
||||
== creating image with preallocation falloc ==
|
||||
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=falloc
|
||||
-size=1048576, everything allocated
|
||||
+size=1048576, max allocation
|
||||
*** done
|
||||
diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2
|
||||
index 55a8dc926f..9e7d8c44df 100644
|
||||
--- a/tests/qemu-iotests/178.out.qcow2
|
||||
+++ b/tests/qemu-iotests/178.out.qcow2
|
||||
@@ -101,7 +101,7 @@ converted image file size in bytes: 196608
|
||||
== raw input image with data (human) ==
|
||||
|
||||
Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824
|
||||
-required size: 393216
|
||||
+required size: 458752
|
||||
fully allocated size: 1074135040
|
||||
wrote 512/512 bytes at offset 512
|
||||
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
@@ -257,7 +257,7 @@ converted image file size in bytes: 196608
|
||||
|
||||
Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824
|
||||
{
|
||||
- "required": 393216,
|
||||
+ "required": 458752,
|
||||
"fully-allocated": 1074135040
|
||||
}
|
||||
wrote 512/512 bytes at offset 512
|
||||
diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out
|
||||
index 9f9dd52bb0..dca024a0c3 100644
|
||||
--- a/tests/qemu-iotests/221.out
|
||||
+++ b/tests/qemu-iotests/221.out
|
||||
@@ -3,14 +3,18 @@ QA output created by 221
|
||||
=== Check mapping of unaligned raw image ===
|
||||
|
||||
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65537
|
||||
-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
+{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
+{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
wrote 1/1 bytes at offset 65536
|
||||
1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
|
||||
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
+{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
|
||||
{ "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
{ "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
|
||||
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
+{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
|
||||
{ "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
{ "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
*** done
|
||||
diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out
|
||||
index 607c0baa0b..3d08b305d7 100644
|
||||
--- a/tests/qemu-iotests/253.out
|
||||
+++ b/tests/qemu-iotests/253.out
|
||||
@@ -3,12 +3,16 @@ QA output created by 253
|
||||
=== Check mapping of unaligned raw image ===
|
||||
|
||||
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575
|
||||
-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
+{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
+{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
|
||||
wrote 65535/65535 bytes at offset 983040
|
||||
63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
-[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
|
||||
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
+{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
|
||||
{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
|
||||
-[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
|
||||
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
|
||||
+{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
|
||||
{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
|
||||
*** done
|
||||
--
|
||||
2.23.0
|
||||
66
block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch
Normal file
66
block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch
Normal file
@ -0,0 +1,66 @@
|
||||
From 84f22c728520792f1010074e0d5ac2ec8e2e372c Mon Sep 17 00:00:00 2001
|
||||
From: Maxim Levitsky <mlevitsk@redhat.com>
|
||||
Date: Sun, 15 Sep 2019 23:36:53 +0300
|
||||
Subject: [PATCH] block/qcow2: Fix corruption introduced by commit 8ac0f15f335
|
||||
|
||||
This fixes subtle corruption introduced by luks threaded encryption
|
||||
in commit 8ac0f15f335
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1745922
|
||||
|
||||
The corruption happens when we do a write that
|
||||
* writes to two or more unallocated clusters at once
|
||||
* doesn't fully cover the first sector
|
||||
* doesn't fully cover the last sector
|
||||
* uses luks encryption
|
||||
|
||||
In this case, when allocating the new clusters we COW both areas
|
||||
prior to the write and after the write, and we encrypt them.
|
||||
|
||||
The above mentioned commit accidentally made it so we encrypt the
|
||||
second COW area using the physical cluster offset of the first area.
|
||||
|
||||
The problem is that offset_in_cluster in do_perform_cow_encrypt
|
||||
can be larger that the cluster size, thus cluster_offset
|
||||
will no longer point to the start of the cluster at which encrypted
|
||||
area starts.
|
||||
|
||||
Next patch in this series will refactor the code to avoid all these
|
||||
assumptions.
|
||||
|
||||
In the bugreport that was triggered by rebasing a luks image to new,
|
||||
zero filled base, which lot of such writes, and causes some files
|
||||
with zero areas to contain garbage there instead.
|
||||
But as described above it can happen elsewhere as well
|
||||
|
||||
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Message-id: 20190915203655.21638-2-mlevitsk@redhat.com
|
||||
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit 38e7d54bdc518b5a05a922467304bcace2396945)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/qcow2-cluster.c | 7 ++++---
|
||||
1 file changed, 4 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
|
||||
index cc5609e27a..760564c8fb 100644
|
||||
--- a/block/qcow2-cluster.c
|
||||
+++ b/block/qcow2-cluster.c
|
||||
@@ -473,9 +473,10 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
|
||||
assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
|
||||
assert((bytes & ~BDRV_SECTOR_MASK) == 0);
|
||||
assert(s->crypto);
|
||||
- if (qcow2_co_encrypt(bs, cluster_offset,
|
||||
- src_cluster_offset + offset_in_cluster,
|
||||
- buffer, bytes) < 0) {
|
||||
+ if (qcow2_co_encrypt(bs,
|
||||
+ start_of_cluster(s, cluster_offset + offset_in_cluster),
|
||||
+ src_cluster_offset + offset_in_cluster,
|
||||
+ buffer, bytes) < 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.23.0
|
||||
124
block-snapshot-Restrict-set-of-snapshot-nodes.patch
Normal file
124
block-snapshot-Restrict-set-of-snapshot-nodes.patch
Normal file
@ -0,0 +1,124 @@
|
||||
From 7a8aa6c734bb1c2927ad0cc1d10bcacb53cf4ae3 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Tue, 17 Sep 2019 12:26:23 +0200
|
||||
Subject: [PATCH] block/snapshot: Restrict set of snapshot nodes
|
||||
|
||||
Nodes involved in internal snapshots were those that were returned by
|
||||
bdrv_next(), inserted and not read-only. bdrv_next() in turn returns all
|
||||
nodes that are either the root node of a BlockBackend or monitor-owned
|
||||
nodes.
|
||||
|
||||
With the typical -drive use, this worked well enough. However, in the
|
||||
typical -blockdev case, the user defines one node per option, making all
|
||||
nodes monitor-owned nodes. This includes protocol nodes etc. which often
|
||||
are not snapshottable, so "savevm" only returns an error.
|
||||
|
||||
Change the conditions so that internal snapshot still include all nodes
|
||||
that have a BlockBackend attached (we definitely want to snapshot
|
||||
anything attached to a guest device and probably also the built-in NBD
|
||||
server; snapshotting block job BlockBackends is more of an accident, but
|
||||
a preexisting one), but other monitor-owned nodes are only included if
|
||||
they have no parents.
|
||||
|
||||
This makes internal snapshots usable again with typical -blockdev
|
||||
configurations.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Peter Krempa <pkrempa@redhat.com>
|
||||
Tested-by: Peter Krempa <pkrempa@redhat.com>
|
||||
(cherry picked from commit 05f4aced658a02b02d3e89a6c7a2281008fcf26c)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/snapshot.c | 26 +++++++++++++++++++-------
|
||||
1 file changed, 19 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/block/snapshot.c b/block/snapshot.c
|
||||
index f2f48f926a..8081616ae9 100644
|
||||
--- a/block/snapshot.c
|
||||
+++ b/block/snapshot.c
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "qapi/qmp/qstring.h"
|
||||
#include "qemu/option.h"
|
||||
+#include "sysemu/block-backend.h"
|
||||
|
||||
QemuOptsList internal_snapshot_opts = {
|
||||
.name = "snapshot",
|
||||
@@ -384,6 +385,16 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs)
|
||||
+{
|
||||
+ if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ /* Include all nodes that are either in use by a BlockBackend, or that
|
||||
+ * aren't attached to any node, but owned by the monitor. */
|
||||
+ return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents);
|
||||
+}
|
||||
|
||||
/* Group operations. All block drivers are involved.
|
||||
* These functions will properly handle dataplane (take aio_context_acquire
|
||||
@@ -399,7 +410,7 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
- if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) {
|
||||
+ if (bdrv_all_snapshots_includes_bs(bs)) {
|
||||
ok = bdrv_can_snapshot(bs);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
@@ -426,8 +437,9 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
- if (bdrv_can_snapshot(bs) &&
|
||||
- bdrv_snapshot_find(bs, snapshot, name) >= 0) {
|
||||
+ if (bdrv_all_snapshots_includes_bs(bs) &&
|
||||
+ bdrv_snapshot_find(bs, snapshot, name) >= 0)
|
||||
+ {
|
||||
ret = bdrv_snapshot_delete(bs, snapshot->id_str,
|
||||
snapshot->name, err);
|
||||
}
|
||||
@@ -455,7 +467,7 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs,
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
- if (bdrv_can_snapshot(bs)) {
|
||||
+ if (bdrv_all_snapshots_includes_bs(bs)) {
|
||||
ret = bdrv_snapshot_goto(bs, name, errp);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
@@ -481,7 +493,7 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
- if (bdrv_can_snapshot(bs)) {
|
||||
+ if (bdrv_all_snapshots_includes_bs(bs)) {
|
||||
err = bdrv_snapshot_find(bs, &sn, name);
|
||||
}
|
||||
aio_context_release(ctx);
|
||||
@@ -512,7 +524,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
|
||||
if (bs == vm_state_bs) {
|
||||
sn->vm_state_size = vm_state_size;
|
||||
err = bdrv_snapshot_create(bs, sn);
|
||||
- } else if (bdrv_can_snapshot(bs)) {
|
||||
+ } else if (bdrv_all_snapshots_includes_bs(bs)) {
|
||||
sn->vm_state_size = 0;
|
||||
err = bdrv_snapshot_create(bs, sn);
|
||||
}
|
||||
@@ -538,7 +550,7 @@ BlockDriverState *bdrv_all_find_vmstate_bs(void)
|
||||
bool found;
|
||||
|
||||
aio_context_acquire(ctx);
|
||||
- found = bdrv_can_snapshot(bs);
|
||||
+ found = bdrv_all_snapshots_includes_bs(bs) && bdrv_can_snapshot(bs);
|
||||
aio_context_release(ctx);
|
||||
|
||||
if (found) {
|
||||
--
|
||||
2.23.0
|
||||
61
blockjob-update-nodes-head-while-removing-all-bdrv.patch
Normal file
61
blockjob-update-nodes-head-while-removing-all-bdrv.patch
Normal file
@ -0,0 +1,61 @@
|
||||
From 86b0f4022bb43b16979ba5300e8d40a1e6d44b79 Mon Sep 17 00:00:00 2001
|
||||
From: Sergio Lopez <slp@redhat.com>
|
||||
Date: Wed, 11 Sep 2019 12:03:16 +0200
|
||||
Subject: [PATCH] blockjob: update nodes head while removing all bdrv
|
||||
|
||||
block_job_remove_all_bdrv() iterates through job->nodes, calling
|
||||
bdrv_root_unref_child() for each entry. The call to the latter may
|
||||
reach child_job_[can_]set_aio_ctx(), which will also attempt to
|
||||
traverse job->nodes, potentially finding entries that where freed
|
||||
on previous iterations.
|
||||
|
||||
To avoid this situation, update job->nodes head on each iteration to
|
||||
ensure that already freed entries are no longer linked to the list.
|
||||
|
||||
RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1746631
|
||||
Signed-off-by: Sergio Lopez <slp@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
Message-id: 20190911100316.32282-1-mreitz@redhat.com
|
||||
Reviewed-by: Sergio Lopez <slp@redhat.com>
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit d876bf676f5e7c6aa9ac64555e48cba8734ecb2f)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
blockjob.c | 17 +++++++++++++----
|
||||
1 file changed, 13 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/blockjob.c b/blockjob.c
|
||||
index 20b7f557da..74abb97bfd 100644
|
||||
--- a/blockjob.c
|
||||
+++ b/blockjob.c
|
||||
@@ -186,14 +186,23 @@ static const BdrvChildRole child_job = {
|
||||
|
||||
void block_job_remove_all_bdrv(BlockJob *job)
|
||||
{
|
||||
- GSList *l;
|
||||
- for (l = job->nodes; l; l = l->next) {
|
||||
+ /*
|
||||
+ * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(),
|
||||
+ * which will also traverse job->nodes, so consume the list one by
|
||||
+ * one to make sure that such a concurrent access does not attempt
|
||||
+ * to process an already freed BdrvChild.
|
||||
+ */
|
||||
+ while (job->nodes) {
|
||||
+ GSList *l = job->nodes;
|
||||
BdrvChild *c = l->data;
|
||||
+
|
||||
+ job->nodes = l->next;
|
||||
+
|
||||
bdrv_op_unblock_all(c->bs, job->blocker);
|
||||
bdrv_root_unref_child(c);
|
||||
+
|
||||
+ g_slist_free_1(l);
|
||||
}
|
||||
- g_slist_free(job->nodes);
|
||||
- job->nodes = NULL;
|
||||
}
|
||||
|
||||
bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs)
|
||||
--
|
||||
2.23.0
|
||||
50
coroutine-Add-qemu_co_mutex_assert_locked.patch
Normal file
50
coroutine-Add-qemu_co_mutex_assert_locked.patch
Normal file
@ -0,0 +1,50 @@
|
||||
From e9bb3d942e268a19e03fc5d404586d2ed1564282 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 24 Oct 2019 16:26:57 +0200
|
||||
Subject: [PATCH] coroutine: Add qemu_co_mutex_assert_locked()
|
||||
|
||||
Some functions require that the caller holds a certain CoMutex for them
|
||||
to operate correctly. Add a function so that they can assert the lock is
|
||||
really held.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Tested-by: Michael Weiser <michael.weiser@gmx.de>
|
||||
Reviewed-by: Michael Weiser <michael.weiser@gmx.de>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Reviewed-by: Denis V. Lunev <den@openvz.org>
|
||||
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit 944f3d5dd216fcd8cb007eddd4f82dced0a15b3d)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
include/qemu/coroutine.h | 15 +++++++++++++++
|
||||
1 file changed, 15 insertions(+)
|
||||
|
||||
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
|
||||
index 9801e7f5a4..f4843b5f59 100644
|
||||
--- a/include/qemu/coroutine.h
|
||||
+++ b/include/qemu/coroutine.h
|
||||
@@ -167,6 +167,21 @@ void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
|
||||
*/
|
||||
void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
|
||||
|
||||
+/**
|
||||
+ * Assert that the current coroutine holds @mutex.
|
||||
+ */
|
||||
+static inline coroutine_fn void qemu_co_mutex_assert_locked(CoMutex *mutex)
|
||||
+{
|
||||
+ /*
|
||||
+ * mutex->holder doesn't need any synchronisation if the assertion holds
|
||||
+ * true because the mutex protects it. If it doesn't hold true, we still
|
||||
+ * don't mind if another thread takes or releases mutex behind our back,
|
||||
+ * because the condition will be false no matter whether we read NULL or
|
||||
+ * the pointer for any other coroutine.
|
||||
+ */
|
||||
+ assert(atomic_read(&mutex->locked) &&
|
||||
+ mutex->holder == qemu_coroutine_self());
|
||||
+}
|
||||
|
||||
/**
|
||||
* CoQueues are a mechanism to queue coroutines in order to continue executing
|
||||
--
|
||||
2.23.0
|
||||
79
dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch
Normal file
79
dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch
Normal file
@ -0,0 +1,79 @@
|
||||
From fbde196c30e4797a51bda046ba514b187963d4ba Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon, 29 Jul 2019 23:34:16 +0200
|
||||
Subject: [PATCH] dma-helpers: ensure AIO callback is invoked after
|
||||
cancellation
|
||||
|
||||
dma_aio_cancel unschedules the BH if there is one, which corresponds
|
||||
to the reschedule_dma case of dma_blk_cb. This can stall the DMA
|
||||
permanently, because dma_complete will never get invoked and therefore
|
||||
nobody will ever invoke the original AIO callback in dbs->common.cb.
|
||||
|
||||
Fix this by invoking the callback (which is ensured to happen after
|
||||
a bdrv_aio_cancel_async, or done manually in the dbs->bh case), and
|
||||
add assertions to check that the DMA state machine is indeed waiting
|
||||
for dma_complete or reschedule_dma, but never both.
|
||||
|
||||
Reported-by: John Snow <jsnow@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Message-id: 20190729213416.1972-1-pbonzini@redhat.com
|
||||
Signed-off-by: John Snow <jsnow@redhat.com>
|
||||
(cherry picked from commit 539343c0a47e19d5dd64d846d64d084d9793681f)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
dma-helpers.c | 13 +++++++++----
|
||||
1 file changed, 9 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/dma-helpers.c b/dma-helpers.c
|
||||
index 2d7e02d35e..d3871dc61e 100644
|
||||
--- a/dma-helpers.c
|
||||
+++ b/dma-helpers.c
|
||||
@@ -90,6 +90,7 @@ static void reschedule_dma(void *opaque)
|
||||
{
|
||||
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
|
||||
|
||||
+ assert(!dbs->acb && dbs->bh);
|
||||
qemu_bh_delete(dbs->bh);
|
||||
dbs->bh = NULL;
|
||||
dma_blk_cb(dbs, 0);
|
||||
@@ -111,15 +112,12 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
|
||||
{
|
||||
trace_dma_complete(dbs, ret, dbs->common.cb);
|
||||
|
||||
+ assert(!dbs->acb && !dbs->bh);
|
||||
dma_blk_unmap(dbs);
|
||||
if (dbs->common.cb) {
|
||||
dbs->common.cb(dbs->common.opaque, ret);
|
||||
}
|
||||
qemu_iovec_destroy(&dbs->iov);
|
||||
- if (dbs->bh) {
|
||||
- qemu_bh_delete(dbs->bh);
|
||||
- dbs->bh = NULL;
|
||||
- }
|
||||
qemu_aio_unref(dbs);
|
||||
}
|
||||
|
||||
@@ -179,14 +177,21 @@ static void dma_aio_cancel(BlockAIOCB *acb)
|
||||
|
||||
trace_dma_aio_cancel(dbs);
|
||||
|
||||
+ assert(!(dbs->acb && dbs->bh));
|
||||
if (dbs->acb) {
|
||||
+ /* This will invoke dma_blk_cb. */
|
||||
blk_aio_cancel_async(dbs->acb);
|
||||
+ return;
|
||||
}
|
||||
+
|
||||
if (dbs->bh) {
|
||||
cpu_unregister_map_client(dbs->bh);
|
||||
qemu_bh_delete(dbs->bh);
|
||||
dbs->bh = NULL;
|
||||
}
|
||||
+ if (dbs->common.cb) {
|
||||
+ dbs->common.cb(dbs->common.opaque, -ECANCELED);
|
||||
+ }
|
||||
}
|
||||
|
||||
static AioContext *dma_get_aio_context(BlockAIOCB *acb)
|
||||
--
|
||||
2.23.0
|
||||
50
hbitmap-handle-set-reset-with-zero-length.patch
Normal file
50
hbitmap-handle-set-reset-with-zero-length.patch
Normal file
@ -0,0 +1,50 @@
|
||||
From c0b35d87de345bd3b59a44c604b247a0497f2fc0 Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Date: Fri, 11 Oct 2019 12:07:07 +0300
|
||||
Subject: [PATCH] hbitmap: handle set/reset with zero length
|
||||
|
||||
Passing zero length to these functions leads to unpredicted results.
|
||||
Zero-length set/reset may occur in active-mirror, on zero-length write
|
||||
(which is unlikely, but not guaranteed to never happen).
|
||||
|
||||
Let's just do nothing on zero-length request.
|
||||
|
||||
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Message-id: 20191011090711.19940-2-vsementsov@virtuozzo.com
|
||||
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit fed33bd175f663cc8c13f8a490a4f35a19756cfe)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
util/hbitmap.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/util/hbitmap.c b/util/hbitmap.c
|
||||
index 71c6ba2c52..c059313b9e 100644
|
||||
--- a/util/hbitmap.c
|
||||
+++ b/util/hbitmap.c
|
||||
@@ -387,6 +387,10 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
|
||||
uint64_t first, n;
|
||||
uint64_t last = start + count - 1;
|
||||
|
||||
+ if (count == 0) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
trace_hbitmap_set(hb, start, count,
|
||||
start >> hb->granularity, last >> hb->granularity);
|
||||
|
||||
@@ -478,6 +482,10 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
|
||||
uint64_t last = start + count - 1;
|
||||
uint64_t gran = 1ULL << hb->granularity;
|
||||
|
||||
+ if (count == 0) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
assert(QEMU_IS_ALIGNED(start, gran));
|
||||
assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size));
|
||||
|
||||
--
|
||||
2.23.0
|
||||
47
hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch
Normal file
47
hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch
Normal file
@ -0,0 +1,47 @@
|
||||
From 220816989c1e3d490d293b8d7ac85dbc41a4c321 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Fri, 20 Sep 2019 18:40:39 +0100
|
||||
Subject: [PATCH] hw/arm/boot.c: Set NSACR.{CP11,CP10} for NS kernel boots
|
||||
|
||||
If we're booting a Linux kernel directly into Non-Secure
|
||||
state on a CPU which has Secure state, then make sure we
|
||||
set the NSACR CP11 and CP10 bits, so that Non-Secure is allowed
|
||||
to access the FPU. Otherwise an AArch32 kernel will UNDEF as
|
||||
soon as it tries to use the FPU.
|
||||
|
||||
It used to not matter that we didn't do this until commit
|
||||
fc1120a7f5f2d4b6, where we implemented actually honouring
|
||||
these NSACR bits.
|
||||
|
||||
The problem only exists for CPUs where EL3 is AArch32; the
|
||||
equivalent AArch64 trap bits are in CPTR_EL3 and are "0 to
|
||||
not trap, 1 to trap", so the reset value of the register
|
||||
permits NS access, unlike NSACR.
|
||||
|
||||
Fixes: fc1120a7f5
|
||||
Fixes: https://bugs.launchpad.net/qemu/+bug/1844597
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Message-id: 20190920174039.3916-1-peter.maydell@linaro.org
|
||||
(cherry picked from commit ece628fcf69cbbd4b3efb6fbd203af07609467a2)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
hw/arm/boot.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
|
||||
index c2b89b3bb9..fc4e021a38 100644
|
||||
--- a/hw/arm/boot.c
|
||||
+++ b/hw/arm/boot.c
|
||||
@@ -754,6 +754,8 @@ static void do_cpu_reset(void *opaque)
|
||||
(cs != first_cpu || !info->secure_board_setup)) {
|
||||
/* Linux expects non-secure state */
|
||||
env->cp15.scr_el3 |= SCR_NS;
|
||||
+ /* Set NSACR.{CP11,CP10} so NS can access the FPU */
|
||||
+ env->cp15.nsacr |= 3 << 10;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.23.0
|
||||
45
hw-core-loader-Fix-possible-crash-in-rom_copy.patch
Normal file
45
hw-core-loader-Fix-possible-crash-in-rom_copy.patch
Normal file
@ -0,0 +1,45 @@
|
||||
From aae0faa5d3bee91c66dc4c1543190f55a242771e Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Wed, 25 Sep 2019 14:16:43 +0200
|
||||
Subject: [PATCH] hw/core/loader: Fix possible crash in rom_copy()
|
||||
|
||||
Both, "rom->addr" and "addr" are derived from the binary image
|
||||
that can be loaded with the "-kernel" paramer. The code in
|
||||
rom_copy() then calculates:
|
||||
|
||||
d = dest + (rom->addr - addr);
|
||||
|
||||
and uses "d" as destination in a memcpy() some lines later. Now with
|
||||
bad kernel images, it is possible that rom->addr is smaller than addr,
|
||||
thus "rom->addr - addr" gets negative and the memcpy() then tries to
|
||||
copy contents from the image to a bad memory location. This could
|
||||
maybe be used to inject code from a kernel image into the QEMU binary,
|
||||
so we better fix it with an additional sanity check here.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Reported-by: Guangming Liu
|
||||
Buglink: https://bugs.launchpad.net/qemu/+bug/1844635
|
||||
Message-Id: <20190925130331.27825-1-thuth@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
(cherry picked from commit e423455c4f23a1a828901c78fe6d03b7dde79319)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
hw/core/loader.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hw/core/loader.c b/hw/core/loader.c
|
||||
index 425bf69a99..838a34174a 100644
|
||||
--- a/hw/core/loader.c
|
||||
+++ b/hw/core/loader.c
|
||||
@@ -1242,7 +1242,7 @@ int rom_copy(uint8_t *dest, hwaddr addr, size_t size)
|
||||
if (rom->addr + rom->romsize < addr) {
|
||||
continue;
|
||||
}
|
||||
- if (rom->addr > end) {
|
||||
+ if (rom->addr > end || rom->addr < addr) {
|
||||
break;
|
||||
}
|
||||
|
||||
--
|
||||
2.23.0
|
||||
42
libvhost-user-fix-SLAVE_SEND_FD-handling.patch
Normal file
42
libvhost-user-fix-SLAVE_SEND_FD-handling.patch
Normal file
@ -0,0 +1,42 @@
|
||||
From 28a9a3558a427493049723fff390add7026653eb Mon Sep 17 00:00:00 2001
|
||||
From: Johannes Berg <johannes.berg@intel.com>
|
||||
Date: Tue, 3 Sep 2019 23:04:22 +0300
|
||||
Subject: [PATCH] libvhost-user: fix SLAVE_SEND_FD handling
|
||||
|
||||
It doesn't look like this could possibly work properly since
|
||||
VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD is defined to 10, but the
|
||||
dev->protocol_features has a bitmap. I suppose the peer this
|
||||
was tested with also supported VHOST_USER_PROTOCOL_F_LOG_SHMFD,
|
||||
in which case the test would always be false, but nevertheless
|
||||
the code seems wrong.
|
||||
|
||||
Use has_feature() to fix this.
|
||||
|
||||
Fixes: d84599f56c82 ("libvhost-user: support host notifier")
|
||||
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
|
||||
Message-Id: <20190903200422.11693-1-johannes@sipsolutions.net>
|
||||
Reviewed-by: Tiwei Bie <tiwei.bie@intel.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit 8726b70b449896f1211f869ec4f608904f027207)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
contrib/libvhost-user/libvhost-user.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
|
||||
index 4b36e35a82..cb5f5770e4 100644
|
||||
--- a/contrib/libvhost-user/libvhost-user.c
|
||||
+++ b/contrib/libvhost-user/libvhost-user.c
|
||||
@@ -1097,7 +1097,8 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
|
||||
|
||||
vmsg.fd_num = fd_num;
|
||||
|
||||
- if ((dev->protocol_features & VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) == 0) {
|
||||
+ if (!has_feature(dev->protocol_features,
|
||||
+ VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
--
|
||||
2.23.0
|
||||
60
make-release-pull-in-edk2-submodules-so-we-can-build.patch
Normal file
60
make-release-pull-in-edk2-submodules-so-we-can-build.patch
Normal file
@ -0,0 +1,60 @@
|
||||
From c5c9b1362d1652a9d0f79f6d9ae2f80d4b5fe432 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
Date: Thu, 12 Sep 2019 18:12:01 -0500
|
||||
Subject: [PATCH] make-release: pull in edk2 submodules so we can build it from
|
||||
tarballs
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The `make efi` target added by 536d2173 is built from the roms/edk2
|
||||
submodule, which in turn relies on additional submodules nested under
|
||||
roms/edk2.
|
||||
|
||||
The make-release script currently only pulls in top-level submodules,
|
||||
so these nested submodules are missing in the resulting tarball.
|
||||
|
||||
We could try to address this situation more generally by recursively
|
||||
pulling in all submodules, but this doesn't necessarily ensure the
|
||||
end-result will build properly (this case also required other changes).
|
||||
|
||||
Additionally, due to the nature of submodules, we may not always have
|
||||
control over how these sorts of things are dealt with, so for now we
|
||||
continue to handle it on a case-by-case in the make-release script.
|
||||
|
||||
Cc: Laszlo Ersek <lersek@redhat.com>
|
||||
Cc: Bruce Rogers <brogers@suse.com>
|
||||
Cc: qemu-stable@nongnu.org # v4.1.0
|
||||
Reported-by: Bruce Rogers <brogers@suse.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
Message-Id: <20190912231202.12327-2-mdroth@linux.vnet.ibm.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
(cherry picked from commit 45c61c6c23918e3b05ed9ecac5b2328ebae5f774)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
scripts/make-release | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/scripts/make-release b/scripts/make-release
|
||||
index b4af9c9e52..a2a8cda33c 100755
|
||||
--- a/scripts/make-release
|
||||
+++ b/scripts/make-release
|
||||
@@ -20,6 +20,14 @@ git checkout "v${version}"
|
||||
git submodule update --init
|
||||
(cd roms/seabios && git describe --tags --long --dirty > .version)
|
||||
(cd roms/skiboot && ./make_version.sh > .version)
|
||||
+# Fetch edk2 submodule's submodules, since it won't have access to them via
|
||||
+# the tarball later.
|
||||
+#
|
||||
+# A more uniform way to handle this sort of situation would be nice, but we
|
||||
+# don't necessarily have much control over how a submodule handles its
|
||||
+# submodule dependencies, so we continue to handle these on a case-by-case
|
||||
+# basis for now.
|
||||
+(cd roms/edk2 && git submodule update --init)
|
||||
popd
|
||||
tar --exclude=.git -cjf ${destination}.tar.bz2 ${destination}
|
||||
rm -rf ${destination}
|
||||
--
|
||||
2.23.0
|
||||
52
mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch
Normal file
52
mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch
Normal file
@ -0,0 +1,52 @@
|
||||
From e092a17d3825a8f2c93cb429aaa5d857b579b64c Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Mon, 22 Jul 2019 17:44:27 +0200
|
||||
Subject: [PATCH] mirror: Keep mirror_top_bs drained after dropping permissions
|
||||
|
||||
mirror_top_bs is currently implicitly drained through its connection to
|
||||
the source or the target node. However, the drain section for target_bs
|
||||
ends early after moving mirror_top_bs from src to target_bs, so that
|
||||
requests can already be restarted while mirror_top_bs is still present
|
||||
in the chain, but has dropped all permissions and therefore runs into an
|
||||
assertion failure like this:
|
||||
|
||||
qemu-system-x86_64: block/io.c:1634: bdrv_co_write_req_prepare:
|
||||
Assertion `child->perm & BLK_PERM_WRITE' failed.
|
||||
|
||||
Keep mirror_top_bs drained until all graph changes have completed.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit d2da5e288a2e71e82866c8fdefd41b5727300124)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/mirror.c | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/mirror.c b/block/mirror.c
|
||||
index 0e3f7923cf..681b305de6 100644
|
||||
--- a/block/mirror.c
|
||||
+++ b/block/mirror.c
|
||||
@@ -661,7 +661,10 @@ static int mirror_exit_common(Job *job)
|
||||
s->target = NULL;
|
||||
|
||||
/* We don't access the source any more. Dropping any WRITE/RESIZE is
|
||||
- * required before it could become a backing file of target_bs. */
|
||||
+ * required before it could become a backing file of target_bs. Not having
|
||||
+ * these permissions any more means that we can't allow any new requests on
|
||||
+ * mirror_top_bs from now on, so keep it drained. */
|
||||
+ bdrv_drained_begin(mirror_top_bs);
|
||||
bs_opaque->stop = true;
|
||||
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
|
||||
&error_abort);
|
||||
@@ -729,6 +732,7 @@ static int mirror_exit_common(Job *job)
|
||||
bs_opaque->job = NULL;
|
||||
|
||||
bdrv_drained_end(src);
|
||||
+ bdrv_drained_end(mirror_top_bs);
|
||||
s->in_drain = false;
|
||||
bdrv_unref(mirror_top_bs);
|
||||
bdrv_unref(src);
|
||||
--
|
||||
2.23.0
|
||||
102
pc-Don-t-make-die-id-mandatory-unless-necessary.patch
Normal file
102
pc-Don-t-make-die-id-mandatory-unless-necessary.patch
Normal file
@ -0,0 +1,102 @@
|
||||
From 7ebcd375ade505358c1c45542de22f188c599bdd Mon Sep 17 00:00:00 2001
|
||||
From: Eduardo Habkost <ehabkost@redhat.com>
|
||||
Date: Fri, 16 Aug 2019 14:07:50 -0300
|
||||
Subject: [PATCH] pc: Don't make die-id mandatory unless necessary
|
||||
|
||||
We have this issue reported when using libvirt to hotplug CPUs:
|
||||
https://bugzilla.redhat.com/show_bug.cgi?id=1741451
|
||||
|
||||
Basically, libvirt is not copying die-id from
|
||||
query-hotpluggable-cpus, but die-id is now mandatory.
|
||||
|
||||
We could blame libvirt and say it is not following the documented
|
||||
interface, because we have this buried in the QAPI schema
|
||||
documentation:
|
||||
|
||||
> Note: currently there are 5 properties that could be present
|
||||
> but management should be prepared to pass through other
|
||||
> properties with device_add command to allow for future
|
||||
> interface extension. This also requires the filed names to be kept in
|
||||
> sync with the properties passed to -device/device_add.
|
||||
|
||||
But I don't think this would be reasonable from us. We can just
|
||||
make QEMU more flexible and let die-id to be omitted when there's
|
||||
no ambiguity. This will allow us to keep compatibility with
|
||||
existing libvirt versions.
|
||||
|
||||
Test case included to ensure we don't break this again.
|
||||
|
||||
Fixes: commit 176d2cda0dee ("i386/cpu: Consolidate die-id validity in smp context")
|
||||
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
|
||||
Message-Id: <20190816170750.23910-1-ehabkost@redhat.com>
|
||||
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
|
||||
(cherry picked from commit fea374e7c8079563bca7c8fac895c6a880f76adc)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
hw/i386/pc.c | 8 ++++++
|
||||
tests/acceptance/pc_cpu_hotplug_props.py | 35 ++++++++++++++++++++++++
|
||||
2 files changed, 43 insertions(+)
|
||||
create mode 100644 tests/acceptance/pc_cpu_hotplug_props.py
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 549c437050..947f81070f 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -2403,6 +2403,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
|
||||
int max_socket = (ms->smp.max_cpus - 1) /
|
||||
smp_threads / smp_cores / pcms->smp_dies;
|
||||
|
||||
+ /*
|
||||
+ * die-id was optional in QEMU 4.0 and older, so keep it optional
|
||||
+ * if there's only one die per socket.
|
||||
+ */
|
||||
+ if (cpu->die_id < 0 && pcms->smp_dies == 1) {
|
||||
+ cpu->die_id = 0;
|
||||
+ }
|
||||
+
|
||||
if (cpu->socket_id < 0) {
|
||||
error_setg(errp, "CPU socket-id is not set");
|
||||
return;
|
||||
diff --git a/tests/acceptance/pc_cpu_hotplug_props.py b/tests/acceptance/pc_cpu_hotplug_props.py
|
||||
new file mode 100644
|
||||
index 0000000000..08b7e632c6
|
||||
--- /dev/null
|
||||
+++ b/tests/acceptance/pc_cpu_hotplug_props.py
|
||||
@@ -0,0 +1,35 @@
|
||||
+#
|
||||
+# Ensure CPU die-id can be omitted on -device
|
||||
+#
|
||||
+# Copyright (c) 2019 Red Hat Inc
|
||||
+#
|
||||
+# Author:
|
||||
+# Eduardo Habkost <ehabkost@redhat.com>
|
||||
+#
|
||||
+# This library is free software; you can redistribute it and/or
|
||||
+# modify it under the terms of the GNU Lesser General Public
|
||||
+# License as published by the Free Software Foundation; either
|
||||
+# version 2 of the License, or (at your option) any later version.
|
||||
+#
|
||||
+# This library is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+# Lesser General Public License for more details.
|
||||
+#
|
||||
+# You should have received a copy of the GNU Lesser General Public
|
||||
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
+#
|
||||
+
|
||||
+from avocado_qemu import Test
|
||||
+
|
||||
+class OmittedCPUProps(Test):
|
||||
+ """
|
||||
+ :avocado: tags=arch:x86_64
|
||||
+ """
|
||||
+ def test_no_die_id(self):
|
||||
+ self.vm.add_args('-nodefaults', '-S')
|
||||
+ self.vm.add_args('-smp', '1,sockets=2,cores=2,threads=2,maxcpus=8')
|
||||
+ self.vm.add_args('-cpu', 'qemu64')
|
||||
+ self.vm.add_args('-device', 'qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id=0')
|
||||
+ self.vm.launch()
|
||||
+ self.assertEquals(len(self.vm.command('query-cpus')), 2)
|
||||
--
|
||||
2.23.0
|
||||
39
pr-manager-Fix-invalid-g_free-crash-bug.patch
Normal file
39
pr-manager-Fix-invalid-g_free-crash-bug.patch
Normal file
@ -0,0 +1,39 @@
|
||||
From 57fdf4a13ff16d9d48a43f02a5e7b42e3d264f83 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Armbruster <armbru@redhat.com>
|
||||
Date: Thu, 22 Aug 2019 15:38:46 +0200
|
||||
Subject: [PATCH] pr-manager: Fix invalid g_free() crash bug
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
pr_manager_worker() passes its @opaque argument to g_free(). Wrong;
|
||||
it points to pr_manager_worker()'s automatic @data. Broken when
|
||||
commit 2f3a7ab39be converted @data from heap- to stack-allocated. Fix
|
||||
by deleting the g_free().
|
||||
|
||||
Fixes: 2f3a7ab39bec4ba8022dc4d42ea641165b004e3e
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Markus Armbruster <armbru@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 6b9d62c2a9e83bbad73fb61406f0ff69b46ff6f3)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
scsi/pr-manager.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c
|
||||
index ee43663576..0c866e8698 100644
|
||||
--- a/scsi/pr-manager.c
|
||||
+++ b/scsi/pr-manager.c
|
||||
@@ -39,7 +39,6 @@ static int pr_manager_worker(void *opaque)
|
||||
int fd = data->fd;
|
||||
int r;
|
||||
|
||||
- g_free(data);
|
||||
trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]);
|
||||
|
||||
/* The reference was taken in pr_manager_execute. */
|
||||
--
|
||||
2.23.0
|
||||
35
qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch
Normal file
35
qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch
Normal file
@ -0,0 +1,35 @@
|
||||
From 405deba14f6b61b9c557484b46e863308c8cf373 Mon Sep 17 00:00:00 2001
|
||||
From: Max Reitz <mreitz@redhat.com>
|
||||
Date: Mon, 28 Oct 2019 17:18:40 +0100
|
||||
Subject: [PATCH] qcow2: Fix QCOW2_COMPRESSED_SECTOR_MASK
|
||||
|
||||
Masks for L2 table entries should have 64 bit.
|
||||
|
||||
Fixes: b6c246942b14d3e0dec46a6c5868ed84e7dbea19
|
||||
Buglink: https://bugs.launchpad.net/qemu/+bug/1850000
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
Message-id: 20191028161841.1198-2-mreitz@redhat.com
|
||||
Reviewed-by: Alberto Garcia <berto@igalia.com>
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit 24552feb6ae2f615b76c2b95394af43901f75046)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/qcow2.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/qcow2.h b/block/qcow2.h
|
||||
index fc1b0d3c1e..359197f89f 100644
|
||||
--- a/block/qcow2.h
|
||||
+++ b/block/qcow2.h
|
||||
@@ -77,7 +77,7 @@
|
||||
|
||||
/* Defined in the qcow2 spec (compressed cluster descriptor) */
|
||||
#define QCOW2_COMPRESSED_SECTOR_SIZE 512U
|
||||
-#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1))
|
||||
+#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL))
|
||||
|
||||
/* Must be at least 2 to cover COW */
|
||||
#define MIN_L2_CACHE_SIZE 2 /* cache entries */
|
||||
--
|
||||
2.23.0
|
||||
71
qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch
Normal file
71
qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch
Normal file
@ -0,0 +1,71 @@
|
||||
From 416a692e51b8b582407e30046ddcffbbe52ecf77 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 24 Oct 2019 16:26:58 +0200
|
||||
Subject: [PATCH] qcow2: Fix corruption bug in
|
||||
qcow2_detect_metadata_preallocation()
|
||||
|
||||
qcow2_detect_metadata_preallocation() calls qcow2_get_refcount() which
|
||||
requires s->lock to be taken to protect its accesses to the refcount
|
||||
table and refcount blocks. However, nothing in this code path actually
|
||||
took the lock. This could cause the same cache entry to be used by two
|
||||
requests at the same time, for different tables at different offsets,
|
||||
resulting in image corruption.
|
||||
|
||||
As it would be preferable to base the detection on consistent data (even
|
||||
though it's just heuristics), let's take the lock not only around the
|
||||
qcow2_get_refcount() calls, but around the whole function.
|
||||
|
||||
This patch takes the lock in qcow2_co_block_status() earlier and asserts
|
||||
in qcow2_detect_metadata_preallocation() that we hold the lock.
|
||||
|
||||
Fixes: 69f47505ee66afaa513305de0c1895a224e52c45
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Reported-by: Michael Weiser <michael.weiser@gmx.de>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Tested-by: Michael Weiser <michael.weiser@gmx.de>
|
||||
Reviewed-by: Michael Weiser <michael.weiser@gmx.de>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
||||
(cherry picked from commit 5e9785505210e2477e590e61b1ab100d0ec22b01)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/qcow2-refcount.c | 2 ++
|
||||
block/qcow2.c | 3 ++-
|
||||
2 files changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
|
||||
index ef965d7895..0d64bf5a5e 100644
|
||||
--- a/block/qcow2-refcount.c
|
||||
+++ b/block/qcow2-refcount.c
|
||||
@@ -3455,6 +3455,8 @@ int qcow2_detect_metadata_preallocation(BlockDriverState *bs)
|
||||
int64_t i, end_cluster, cluster_count = 0, threshold;
|
||||
int64_t file_length, real_allocation, real_clusters;
|
||||
|
||||
+ qemu_co_mutex_assert_locked(&s->lock);
|
||||
+
|
||||
file_length = bdrv_getlength(bs->file->bs);
|
||||
if (file_length < 0) {
|
||||
return file_length;
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index 865839682c..c0f5439dc8 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -1899,6 +1899,8 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
|
||||
unsigned int bytes;
|
||||
int status = 0;
|
||||
|
||||
+ qemu_co_mutex_lock(&s->lock);
|
||||
+
|
||||
if (!s->metadata_preallocation_checked) {
|
||||
ret = qcow2_detect_metadata_preallocation(bs);
|
||||
s->metadata_preallocation = (ret == 1);
|
||||
@@ -1906,7 +1908,6 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
|
||||
}
|
||||
|
||||
bytes = MIN(INT_MAX, count);
|
||||
- qemu_co_mutex_lock(&s->lock);
|
||||
ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
|
||||
qemu_co_mutex_unlock(&s->lock);
|
||||
if (ret < 0) {
|
||||
--
|
||||
2.23.0
|
||||
58
qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch
Normal file
58
qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch
Normal file
@ -0,0 +1,58 @@
|
||||
From c9ffb12754b1575babfef45168b6e1b1af80a95f Mon Sep 17 00:00:00 2001
|
||||
From: Alberto Garcia <berto@igalia.com>
|
||||
Date: Fri, 16 Aug 2019 15:17:42 +0300
|
||||
Subject: [PATCH] qcow2: Fix the calculation of the maximum L2 cache size
|
||||
|
||||
The size of the qcow2 L2 cache defaults to 32 MB, which can be easily
|
||||
larger than the maximum amount of L2 metadata that the image can have.
|
||||
For example: with 64 KB clusters the user would need a qcow2 image
|
||||
with a virtual size of 256 GB in order to have 32 MB of L2 metadata.
|
||||
|
||||
Because of that, since commit b749562d9822d14ef69c9eaa5f85903010b86c30
|
||||
we forbid the L2 cache to become larger than the maximum amount of L2
|
||||
metadata for the image, calculated using this formula:
|
||||
|
||||
uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
|
||||
|
||||
The problem with this formula is that the result should be rounded up
|
||||
to the cluster size because an L2 table on disk always takes one full
|
||||
cluster.
|
||||
|
||||
For example, a 1280 MB qcow2 image with 64 KB clusters needs exactly
|
||||
160 KB of L2 metadata, but we need 192 KB on disk (3 clusters) even if
|
||||
the last 32 KB of those are not going to be used.
|
||||
|
||||
However QEMU rounds the numbers down and only creates 2 cache tables
|
||||
(128 KB), which is not enough for the image.
|
||||
|
||||
A quick test doing 4KB random writes on a 1280 MB image gives me
|
||||
around 500 IOPS, while with the correct cache size I get 16K IOPS.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Alberto Garcia <berto@igalia.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit b70d08205b2e4044c529eefc21df2c8ab61b473b)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/qcow2.c | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index 039bdc2f7e..865839682c 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -826,7 +826,11 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
|
||||
bool l2_cache_entry_size_set;
|
||||
int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
|
||||
uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
|
||||
- uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
|
||||
+ uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
|
||||
+ /* An L2 table is always one cluster in size so the max cache size
|
||||
+ * should be a multiple of the cluster size. */
|
||||
+ uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t),
|
||||
+ s->cluster_size);
|
||||
|
||||
combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
|
||||
l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
|
||||
--
|
||||
2.23.0
|
||||
43
qemu.spec
43
qemu.spec
@ -61,6 +61,46 @@ Patch0048: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch
|
||||
Patch0049: pcie-Compat-with-devices-which-do-not-support-Link-W.patch
|
||||
Patch0050: aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch
|
||||
Patch0051: async-use-explicit-memory-barriers.patch
|
||||
Patch0052: dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch
|
||||
Patch0053: Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch
|
||||
Patch0054: pc-Don-t-make-die-id-mandatory-unless-necessary.patch
|
||||
Patch0055: block-file-posix-Reduce-xfsctl-use.patch
|
||||
Patch0056: pr-manager-Fix-invalid-g_free-crash-bug.patch
|
||||
Patch0057: x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch
|
||||
Patch0058: vpc-Return-0-from-vpc_co_create-on-success.patch
|
||||
Patch0059: target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch
|
||||
Patch0060: target-arm-Don-t-abort-on-M-profile-exception-return.patch
|
||||
Patch0061: libvhost-user-fix-SLAVE_SEND_FD-handling.patch
|
||||
Patch0062: qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch
|
||||
Patch0063: block-nfs-tear-down-aio-before-nfs_close.patch
|
||||
Patch0064: blockjob-update-nodes-head-while-removing-all-bdrv.patch
|
||||
Patch0065: block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch
|
||||
Patch0066: coroutine-Add-qemu_co_mutex_assert_locked.patch
|
||||
Patch0067: qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch
|
||||
Patch0068: hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch
|
||||
Patch0069: make-release-pull-in-edk2-submodules-so-we-can-build.patch
|
||||
Patch0070: roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch
|
||||
Patch0071: block-snapshot-Restrict-set-of-snapshot-nodes.patch
|
||||
Patch0072: vhost-user-save-features-if-the-char-dev-is-closed.patch
|
||||
Patch0073: hw-core-loader-Fix-possible-crash-in-rom_copy.patch
|
||||
Patch0074: ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch
|
||||
Patch0075: virtio-new-post_load-hook.patch
|
||||
Patch0076: virtio-net-prevent-offloads-reset-on-migration.patch
|
||||
Patch0077: util-hbitmap-strict-hbitmap_reset.patch
|
||||
Patch0078: hbitmap-handle-set-reset-with-zero-length.patch
|
||||
Patch0079: target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch
|
||||
Patch0080: scsi-lsi-exit-infinite-loop-while-executing-script-C.patch
|
||||
Patch0081: virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch
|
||||
Patch0082: qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch
|
||||
Patch0083: util-iov-introduce-qemu_iovec_init_extended.patch
|
||||
Patch0084: util-iov-improve-qemu_iovec_is_zero.patch
|
||||
Patch0085: block-io-refactor-padding.patch
|
||||
Patch0086: block-Make-wait-mark-serialising-requests-public.patch
|
||||
Patch0087: block-Add-bdrv_co_get_self_request.patch
|
||||
Patch0088: block-file-posix-Let-post-EOF-fallocate-serialize.patch
|
||||
Patch0089: block-posix-Always-allocate-the-first-block.patch
|
||||
Patch0090: block-create-Do-not-abort-if-a-block-driver-is-not-a.patch
|
||||
Patch0091: mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch
|
||||
|
||||
|
||||
BuildRequires: flex
|
||||
@ -407,6 +447,9 @@ getent passwd qemu >/dev/null || \
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Fri Apr 17 2020 Huawei Technologies Co., Ltd. <fangying1@huawei.com>
|
||||
- backport patch bundles from qemu stable v4.1.1
|
||||
|
||||
* Thu Apr 16 2020 Huawei Technologies Co., Ltd. <fangying1@huawei.com>
|
||||
- aio-wait: delegate polling of main AioContext if BQL not held
|
||||
- async: use explicit memory barriers
|
||||
|
||||
54
roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch
Normal file
54
roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch
Normal file
@ -0,0 +1,54 @@
|
||||
From fc5afb1a9230fe21d76bcef527b0d3cee90a2cd3 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
Date: Thu, 12 Sep 2019 18:12:02 -0500
|
||||
Subject: [PATCH] roms/Makefile.edk2: don't pull in submodules when building
|
||||
from tarball
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Currently the `make efi` target pulls submodules nested under the
|
||||
roms/edk2 submodule as dependencies. However, when we attempt to build
|
||||
from a tarball this fails since we are no longer in a git tree.
|
||||
|
||||
A preceding patch will pre-populate these submodules in the tarball,
|
||||
so assume this build dependency is only needed when building from a
|
||||
git tree.
|
||||
|
||||
Cc: Laszlo Ersek <lersek@redhat.com>
|
||||
Cc: Bruce Rogers <brogers@suse.com>
|
||||
Cc: qemu-stable@nongnu.org # v4.1.0
|
||||
Reported-by: Bruce Rogers <brogers@suse.com>
|
||||
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
Message-Id: <20190912231202.12327-3-mdroth@linux.vnet.ibm.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
(cherry picked from commit f3e330e3c319160ac04954399b5a10afc965098c)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
roms/Makefile.edk2 | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/roms/Makefile.edk2 b/roms/Makefile.edk2
|
||||
index c2f2ff59d5..33a074d3a4 100644
|
||||
--- a/roms/Makefile.edk2
|
||||
+++ b/roms/Makefile.edk2
|
||||
@@ -46,8 +46,13 @@ all: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd.bz2) \
|
||||
# files.
|
||||
.INTERMEDIATE: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd)
|
||||
|
||||
+# Fetch edk2 submodule's submodules. If it is not in a git tree, assume
|
||||
+# we're building from a tarball and that they've already been fetched by
|
||||
+# make-release/tarball scripts.
|
||||
submodules:
|
||||
- cd edk2 && git submodule update --init --force
|
||||
+ if test -d edk2/.git; then \
|
||||
+ cd edk2 && git submodule update --init --force; \
|
||||
+ fi
|
||||
|
||||
# See notes on the ".NOTPARALLEL" target and the "+" indicator in
|
||||
# "tests/uefi-test-tools/Makefile".
|
||||
--
|
||||
2.23.0
|
||||
104
scsi-lsi-exit-infinite-loop-while-executing-script-C.patch
Normal file
104
scsi-lsi-exit-infinite-loop-while-executing-script-C.patch
Normal file
@ -0,0 +1,104 @@
|
||||
From 051c9b3cbcb4beb42a6ed017c2146ec3e7a754fb Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Wed, 14 Aug 2019 17:35:21 +0530
|
||||
Subject: [PATCH] scsi: lsi: exit infinite loop while executing script
|
||||
(CVE-2019-12068)
|
||||
|
||||
When executing script in lsi_execute_script(), the LSI scsi adapter
|
||||
emulator advances 's->dsp' index to read next opcode. This can lead
|
||||
to an infinite loop if the next opcode is empty. Move the existing
|
||||
loop exit after 10k iterations so that it covers no-op opcodes as
|
||||
well.
|
||||
|
||||
Reported-by: Bugs SysSec <bugs-syssec@rub.de>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit de594e47659029316bbf9391efb79da0a1a08e08)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 41 +++++++++++++++++++++++++++--------------
|
||||
1 file changed, 27 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index 10468c1ec1..72f7b59ab5 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -185,6 +185,9 @@ static const char *names[] = {
|
||||
/* Flag set if this is a tagged command. */
|
||||
#define LSI_TAG_VALID (1 << 16)
|
||||
|
||||
+/* Maximum instructions to process. */
|
||||
+#define LSI_MAX_INSN 10000
|
||||
+
|
||||
typedef struct lsi_request {
|
||||
SCSIRequest *req;
|
||||
uint32_t tag;
|
||||
@@ -1132,7 +1135,21 @@ static void lsi_execute_script(LSIState *s)
|
||||
|
||||
s->istat1 |= LSI_ISTAT1_SRUN;
|
||||
again:
|
||||
- insn_processed++;
|
||||
+ if (++insn_processed > LSI_MAX_INSN) {
|
||||
+ /* Some windows drivers make the device spin waiting for a memory
|
||||
+ location to change. If we have been executed a lot of code then
|
||||
+ assume this is the case and force an unexpected device disconnect.
|
||||
+ This is apparently sufficient to beat the drivers into submission.
|
||||
+ */
|
||||
+ if (!(s->sien0 & LSI_SIST0_UDC)) {
|
||||
+ qemu_log_mask(LOG_GUEST_ERROR,
|
||||
+ "lsi_scsi: inf. loop with UDC masked");
|
||||
+ }
|
||||
+ lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0);
|
||||
+ lsi_disconnect(s);
|
||||
+ trace_lsi_execute_script_stop();
|
||||
+ return;
|
||||
+ }
|
||||
insn = read_dword(s, s->dsp);
|
||||
if (!insn) {
|
||||
/* If we receive an empty opcode increment the DSP by 4 bytes
|
||||
@@ -1569,19 +1586,7 @@ again:
|
||||
}
|
||||
}
|
||||
}
|
||||
- if (insn_processed > 10000 && s->waiting == LSI_NOWAIT) {
|
||||
- /* Some windows drivers make the device spin waiting for a memory
|
||||
- location to change. If we have been executed a lot of code then
|
||||
- assume this is the case and force an unexpected device disconnect.
|
||||
- This is apparently sufficient to beat the drivers into submission.
|
||||
- */
|
||||
- if (!(s->sien0 & LSI_SIST0_UDC)) {
|
||||
- qemu_log_mask(LOG_GUEST_ERROR,
|
||||
- "lsi_scsi: inf. loop with UDC masked");
|
||||
- }
|
||||
- lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0);
|
||||
- lsi_disconnect(s);
|
||||
- } else if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) {
|
||||
+ if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) {
|
||||
if (s->dcntl & LSI_DCNTL_SSM) {
|
||||
lsi_script_dma_interrupt(s, LSI_DSTAT_SSI);
|
||||
} else {
|
||||
@@ -1969,6 +1974,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
|
||||
case 0x2f: /* DSP[24:31] */
|
||||
s->dsp &= 0x00ffffff;
|
||||
s->dsp |= val << 24;
|
||||
+ /*
|
||||
+ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one
|
||||
+ * instruction. Is this correct?
|
||||
+ */
|
||||
if ((s->dmode & LSI_DMODE_MAN) == 0
|
||||
&& (s->istat1 & LSI_ISTAT1_SRUN) == 0)
|
||||
lsi_execute_script(s);
|
||||
@@ -1987,6 +1996,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
|
||||
break;
|
||||
case 0x3b: /* DCNTL */
|
||||
s->dcntl = val & ~(LSI_DCNTL_PFF | LSI_DCNTL_STD);
|
||||
+ /*
|
||||
+ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one
|
||||
+ * instruction. Is this correct?
|
||||
+ */
|
||||
if ((val & LSI_DCNTL_STD) && (s->istat1 & LSI_ISTAT1_SRUN) == 0)
|
||||
lsi_execute_script(s);
|
||||
break;
|
||||
--
|
||||
2.23.0
|
||||
41
target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch
Normal file
41
target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch
Normal file
@ -0,0 +1,41 @@
|
||||
From cdc6896659b85f7ed8f7552850312e55170de0c5 Mon Sep 17 00:00:00 2001
|
||||
From: Christophe Lyon <christophe.lyon@linaro.org>
|
||||
Date: Fri, 25 Oct 2019 11:57:11 +0200
|
||||
Subject: [PATCH] target/arm: Allow reading flags from FPSCR for M-profile
|
||||
|
||||
rt==15 is a special case when reading the flags: it means the
|
||||
destination is APSR. This patch avoids rejecting
|
||||
vmrs apsr_nzcv, fpscr
|
||||
as illegal instruction.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Christophe Lyon <christophe.lyon@linaro.org>
|
||||
Message-id: 20191025095711.10853-1-christophe.lyon@linaro.org
|
||||
[PMM: updated the comment]
|
||||
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 2529ab43b8a05534494704e803e0332d111d8b91)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
target/arm/translate-vfp.inc.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
|
||||
index ef45cecbea..75406fd9db 100644
|
||||
--- a/target/arm/translate-vfp.inc.c
|
||||
+++ b/target/arm/translate-vfp.inc.c
|
||||
@@ -704,9 +704,10 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
|
||||
if (arm_dc_feature(s, ARM_FEATURE_M)) {
|
||||
/*
|
||||
* The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
|
||||
- * Writes to R15 are UNPREDICTABLE; we choose to undef.
|
||||
+ * Accesses to R15 are UNPREDICTABLE; we choose to undef.
|
||||
+ * (FPSCR -> r15 is a special case which writes to the PSR flags.)
|
||||
*/
|
||||
- if (a->rt == 15 || a->reg != ARM_VFP_FPSCR) {
|
||||
+ if (a->rt == 15 && (!a->l || a->reg != ARM_VFP_FPSCR)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.23.0
|
||||
103
target-arm-Don-t-abort-on-M-profile-exception-return.patch
Normal file
103
target-arm-Don-t-abort-on-M-profile-exception-return.patch
Normal file
@ -0,0 +1,103 @@
|
||||
From 9027d3fba605d8f6093342ebe4a1da450d374630 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Thu, 22 Aug 2019 14:15:34 +0100
|
||||
Subject: [PATCH] target/arm: Don't abort on M-profile exception return in
|
||||
linux-user mode
|
||||
|
||||
An attempt to do an exception-return (branch to one of the magic
|
||||
addresses) in linux-user mode for M-profile should behave like
|
||||
a normal branch, because linux-user mode is always going to be
|
||||
in 'handler' mode. This used to work, but we broke it when we added
|
||||
support for the M-profile security extension in commit d02a8698d7ae2bfed.
|
||||
|
||||
In that commit we allowed even handler-mode calls to magic return
|
||||
values to be checked for and dealt with by causing an
|
||||
EXCP_EXCEPTION_EXIT exception to be taken, because this is
|
||||
needed for the FNC_RETURN return-from-non-secure-function-call
|
||||
handling. For system mode we added a check in do_v7m_exception_exit()
|
||||
to make any spurious calls from Handler mode behave correctly, but
|
||||
forgot that linux-user mode would also be affected.
|
||||
|
||||
How an attempted return-from-non-secure-function-call in linux-user
|
||||
mode should be handled is not clear -- on real hardware it would
|
||||
result in return to secure code (not to the Linux kernel) which
|
||||
could then handle the error in any way it chose. For QEMU we take
|
||||
the simple approach of treating this erroneous return the same way
|
||||
it would be handled on a CPU without the security extensions --
|
||||
treat it as a normal branch.
|
||||
|
||||
The upshot of all this is that for linux-user mode we should never
|
||||
do any of the bx_excret magic, so the code change is simple.
|
||||
|
||||
This ought to be a weird corner case that only affects broken guest
|
||||
code (because Linux user processes should never be attempting to do
|
||||
exception returns or NS function returns), except that the code that
|
||||
assigns addresses in RAM for the process and stack in our linux-user
|
||||
code does not attempt to avoid this magic address range, so
|
||||
legitimate code attempting to return to a trampoline routine on the
|
||||
stack can fall into this case. This change fixes those programs,
|
||||
but we should also look at restricting the range of memory we
|
||||
use for M-profile linux-user guests to the area that would be
|
||||
real RAM in hardware.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Reported-by: Christophe Lyon <christophe.lyon@linaro.org>
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Message-id: 20190822131534.16602-1-peter.maydell@linaro.org
|
||||
Fixes: https://bugs.launchpad.net/qemu/+bug/1840922
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit 5e5584c89f36b302c666bc6db535fd3f7ff35ad2)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
target/arm/translate.c | 21 ++++++++++++++++++++-
|
||||
1 file changed, 20 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/arm/translate.c b/target/arm/translate.c
|
||||
index 7853462b21..24cb4ba075 100644
|
||||
--- a/target/arm/translate.c
|
||||
+++ b/target/arm/translate.c
|
||||
@@ -952,10 +952,27 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var)
|
||||
store_cpu_field(var, thumb);
|
||||
}
|
||||
|
||||
-/* Set PC and Thumb state from var. var is marked as dead.
|
||||
+/*
|
||||
+ * Set PC and Thumb state from var. var is marked as dead.
|
||||
* For M-profile CPUs, include logic to detect exception-return
|
||||
* branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
|
||||
* and BX reg, and no others, and happens only for code in Handler mode.
|
||||
+ * The Security Extension also requires us to check for the FNC_RETURN
|
||||
+ * which signals a function return from non-secure state; this can happen
|
||||
+ * in both Handler and Thread mode.
|
||||
+ * To avoid having to do multiple comparisons in inline generated code,
|
||||
+ * we make the check we do here loose, so it will match for EXC_RETURN
|
||||
+ * in Thread mode. For system emulation do_v7m_exception_exit() checks
|
||||
+ * for these spurious cases and returns without doing anything (giving
|
||||
+ * the same behaviour as for a branch to a non-magic address).
|
||||
+ *
|
||||
+ * In linux-user mode it is unclear what the right behaviour for an
|
||||
+ * attempted FNC_RETURN should be, because in real hardware this will go
|
||||
+ * directly to Secure code (ie not the Linux kernel) which will then treat
|
||||
+ * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
|
||||
+ * attempt behave the way it would on a CPU without the security extension,
|
||||
+ * which is to say "like a normal branch". That means we can simply treat
|
||||
+ * all branches as normal with no magic address behaviour.
|
||||
*/
|
||||
static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
|
||||
{
|
||||
@@ -963,10 +980,12 @@ static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
|
||||
* s->base.is_jmp that we need to do the rest of the work later.
|
||||
*/
|
||||
gen_bx(s, var);
|
||||
+#ifndef CONFIG_USER_ONLY
|
||||
if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
|
||||
(s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
|
||||
s->base.is_jmp = DISAS_BX_EXCRET;
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void gen_bx_excret_final_code(DisasContext *s)
|
||||
--
|
||||
2.23.0
|
||||
40
target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch
Normal file
40
target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch
Normal file
@ -0,0 +1,40 @@
|
||||
From 38fb634853ac6547326d9f88b9a068d9fc6b4ad4 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Maydell <peter.maydell@linaro.org>
|
||||
Date: Tue, 27 Aug 2019 13:19:31 +0100
|
||||
Subject: [PATCH] target/arm: Free TCG temps in trans_VMOV_64_sp()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The function neon_store_reg32() doesn't free the TCG temp that it
|
||||
is passed, so the caller must do that. We got this right in most
|
||||
places but forgot to free the TCG temps in trans_VMOV_64_sp().
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Message-id: 20190827121931.26836-1-peter.maydell@linaro.org
|
||||
(cherry picked from commit 342d27581bd3ecdb995e4fc55fcd383cf3242888)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
target/arm/translate-vfp.inc.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
|
||||
index 092eb5ec53..ef45cecbea 100644
|
||||
--- a/target/arm/translate-vfp.inc.c
|
||||
+++ b/target/arm/translate-vfp.inc.c
|
||||
@@ -881,8 +881,10 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
|
||||
/* gpreg to fpreg */
|
||||
tmp = load_reg(s, a->rt);
|
||||
neon_store_reg32(tmp, a->vm);
|
||||
+ tcg_temp_free_i32(tmp);
|
||||
tmp = load_reg(s, a->rt2);
|
||||
neon_store_reg32(tmp, a->vm + 1);
|
||||
+ tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
return true;
|
||||
--
|
||||
2.23.0
|
||||
62
ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch
Normal file
62
ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch
Normal file
@ -0,0 +1,62 @@
|
||||
From 6705b9344f8d6f134f612c2e35e87cdda5aa6284 Mon Sep 17 00:00:00 2001
|
||||
From: Hikaru Nishida <hikarupsp@gmail.com>
|
||||
Date: Tue, 15 Oct 2019 10:07:34 +0900
|
||||
Subject: [PATCH] ui: Fix hanging up Cocoa display on macOS 10.15 (Catalina)
|
||||
|
||||
macOS API documentation says that before applicationDidFinishLaunching
|
||||
is called, any events will not be processed. However, some events are
|
||||
fired before it is called in macOS Catalina. This causes deadlock of
|
||||
iothread_lock in handleEvent while it will be released after the
|
||||
app_started_sem is posted.
|
||||
This patch avoids processing events before the app_started_sem is
|
||||
posted to prevent this deadlock.
|
||||
|
||||
Buglink: https://bugs.launchpad.net/qemu/+bug/1847906
|
||||
Signed-off-by: Hikaru Nishida <hikarupsp@gmail.com>
|
||||
Message-id: 20191015010734.85229-1-hikarupsp@gmail.com
|
||||
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
|
||||
(cherry picked from commit dff742ad27efa474ec04accdbf422c9acfd3e30e)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
ui/cocoa.m | 12 ++++++++++++
|
||||
1 file changed, 12 insertions(+)
|
||||
|
||||
diff --git a/ui/cocoa.m b/ui/cocoa.m
|
||||
index c2984028c5..3026ead621 100644
|
||||
--- a/ui/cocoa.m
|
||||
+++ b/ui/cocoa.m
|
||||
@@ -132,6 +132,7 @@ NSArray * supportedImageFileTypes;
|
||||
|
||||
static QemuSemaphore display_init_sem;
|
||||
static QemuSemaphore app_started_sem;
|
||||
+static bool allow_events;
|
||||
|
||||
// Utility functions to run specified code block with iothread lock held
|
||||
typedef void (^CodeBlock)(void);
|
||||
@@ -727,6 +728,16 @@ QemuCocoaView *cocoaView;
|
||||
|
||||
- (bool) handleEvent:(NSEvent *)event
|
||||
{
|
||||
+ if(!allow_events) {
|
||||
+ /*
|
||||
+ * Just let OSX have all events that arrive before
|
||||
+ * applicationDidFinishLaunching.
|
||||
+ * This avoids a deadlock on the iothread lock, which cocoa_display_init()
|
||||
+ * will not drop until after the app_started_sem is posted. (In theory
|
||||
+ * there should not be any such events, but OSX Catalina now emits some.)
|
||||
+ */
|
||||
+ return false;
|
||||
+ }
|
||||
return bool_with_iothread_lock(^{
|
||||
return [self handleEventLocked:event];
|
||||
});
|
||||
@@ -1154,6 +1165,7 @@ QemuCocoaView *cocoaView;
|
||||
- (void)applicationDidFinishLaunching: (NSNotification *) note
|
||||
{
|
||||
COCOA_DEBUG("QemuCocoaAppController: applicationDidFinishLaunching\n");
|
||||
+ allow_events = true;
|
||||
/* Tell cocoa_display_init to proceed */
|
||||
qemu_sem_post(&app_started_sem);
|
||||
}
|
||||
--
|
||||
2.23.0
|
||||
77
util-hbitmap-strict-hbitmap_reset.patch
Normal file
77
util-hbitmap-strict-hbitmap_reset.patch
Normal file
@ -0,0 +1,77 @@
|
||||
From fcd7cba6acb7344aca70f5f8ec16626e817b35a5 Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Date: Tue, 6 Aug 2019 18:26:11 +0300
|
||||
Subject: [PATCH] util/hbitmap: strict hbitmap_reset
|
||||
|
||||
hbitmap_reset has an unobvious property: it rounds requested region up.
|
||||
It may provoke bugs, like in recently fixed write-blocking mode of
|
||||
mirror: user calls reset on unaligned region, not keeping in mind that
|
||||
there are possible unrelated dirty bytes, covered by rounded-up region
|
||||
and information of this unrelated "dirtiness" will be lost.
|
||||
|
||||
Make hbitmap_reset strict: assert that arguments are aligned, allowing
|
||||
only one exception when @start + @count == hb->orig_size. It's needed
|
||||
to comfort users of hbitmap_next_dirty_area, which cares about
|
||||
hb->orig_size.
|
||||
|
||||
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
||||
Message-Id: <20190806152611.280389-1-vsementsov@virtuozzo.com>
|
||||
[Maintainer edit: Max's suggestions from on-list. --js]
|
||||
[Maintainer edit: Eric's suggestion for aligned macro. --js]
|
||||
Signed-off-by: John Snow <jsnow@redhat.com>
|
||||
(cherry picked from commit 48557b138383aaf69c2617ca9a88bfb394fc50ec)
|
||||
*prereq for fed33bd175f663cc8c13f8a490a4f35a19756cfe
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
include/qemu/hbitmap.h | 5 +++++
|
||||
tests/test-hbitmap.c | 2 +-
|
||||
util/hbitmap.c | 4 ++++
|
||||
3 files changed, 10 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
|
||||
index 4afbe6292e..1bf944ca3d 100644
|
||||
--- a/include/qemu/hbitmap.h
|
||||
+++ b/include/qemu/hbitmap.h
|
||||
@@ -132,6 +132,11 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count);
|
||||
* @count: Number of bits to reset.
|
||||
*
|
||||
* Reset a consecutive range of bits in an HBitmap.
|
||||
+ * @start and @count must be aligned to bitmap granularity. The only exception
|
||||
+ * is resetting the tail of the bitmap: @count may be equal to hb->orig_size -
|
||||
+ * @start, in this case @count may be not aligned. The sum of @start + @count is
|
||||
+ * allowed to be greater than hb->orig_size, but only if @start < hb->orig_size
|
||||
+ * and @start + @count = ALIGN_UP(hb->orig_size, granularity).
|
||||
*/
|
||||
void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count);
|
||||
|
||||
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
|
||||
index 592d8219db..2be56d1597 100644
|
||||
--- a/tests/test-hbitmap.c
|
||||
+++ b/tests/test-hbitmap.c
|
||||
@@ -423,7 +423,7 @@ static void test_hbitmap_granularity(TestHBitmapData *data,
|
||||
hbitmap_test_check(data, 0);
|
||||
hbitmap_test_set(data, 0, 3);
|
||||
g_assert_cmpint(hbitmap_count(data->hb), ==, 4);
|
||||
- hbitmap_test_reset(data, 0, 1);
|
||||
+ hbitmap_test_reset(data, 0, 2);
|
||||
g_assert_cmpint(hbitmap_count(data->hb), ==, 2);
|
||||
}
|
||||
|
||||
diff --git a/util/hbitmap.c b/util/hbitmap.c
|
||||
index bcc0acdc6a..71c6ba2c52 100644
|
||||
--- a/util/hbitmap.c
|
||||
+++ b/util/hbitmap.c
|
||||
@@ -476,6 +476,10 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
|
||||
/* Compute range in the last layer. */
|
||||
uint64_t first;
|
||||
uint64_t last = start + count - 1;
|
||||
+ uint64_t gran = 1ULL << hb->granularity;
|
||||
+
|
||||
+ assert(QEMU_IS_ALIGNED(start, gran));
|
||||
+ assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size));
|
||||
|
||||
trace_hbitmap_reset(hb, start, count,
|
||||
start >> hb->granularity, last >> hb->granularity);
|
||||
--
|
||||
2.23.0
|
||||
102
util-iov-improve-qemu_iovec_is_zero.patch
Normal file
102
util-iov-improve-qemu_iovec_is_zero.patch
Normal file
@ -0,0 +1,102 @@
|
||||
From b3b76fc643912d2c86b13caff30a1151f2958702 Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Date: Tue, 4 Jun 2019 19:15:04 +0300
|
||||
Subject: [PATCH] util/iov: improve qemu_iovec_is_zero
|
||||
|
||||
We'll need to check a part of qiov soon, so implement it now.
|
||||
|
||||
Optimization with align down to 4 * sizeof(long) is dropped due to:
|
||||
1. It is strange: it aligns length of the buffer, but where is a
|
||||
guarantee that buffer pointer is aligned itself?
|
||||
2. buffer_is_zero() is a better place for optimizations and it has
|
||||
them.
|
||||
|
||||
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-id: 20190604161514.262241-3-vsementsov@virtuozzo.com
|
||||
Message-Id: <20190604161514.262241-3-vsementsov@virtuozzo.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit f76889e7b947d896db51be8a4d9c941c2f70365a)
|
||||
*prereq for 292d06b9
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/io.c | 2 +-
|
||||
include/qemu/iov.h | 2 +-
|
||||
util/iov.c | 31 +++++++++++++++++++------------
|
||||
3 files changed, 21 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 06305c6ea6..dccf687acc 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1715,7 +1715,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
|
||||
|
||||
if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
|
||||
!(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
|
||||
- qemu_iovec_is_zero(qiov)) {
|
||||
+ qemu_iovec_is_zero(qiov, 0, qiov->size)) {
|
||||
flags |= BDRV_REQ_ZERO_WRITE;
|
||||
if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
|
||||
flags |= BDRV_REQ_MAY_UNMAP;
|
||||
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
|
||||
index f3787a0cf7..29957c8a72 100644
|
||||
--- a/include/qemu/iov.h
|
||||
+++ b/include/qemu/iov.h
|
||||
@@ -212,7 +212,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
|
||||
size_t qemu_iovec_concat_iov(QEMUIOVector *dst,
|
||||
struct iovec *src_iov, unsigned int src_cnt,
|
||||
size_t soffset, size_t sbytes);
|
||||
-bool qemu_iovec_is_zero(QEMUIOVector *qiov);
|
||||
+bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t qiov_offeset, size_t bytes);
|
||||
void qemu_iovec_destroy(QEMUIOVector *qiov);
|
||||
void qemu_iovec_reset(QEMUIOVector *qiov);
|
||||
size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
|
||||
diff --git a/util/iov.c b/util/iov.c
|
||||
index 366ff9cdd1..9ac0261853 100644
|
||||
--- a/util/iov.c
|
||||
+++ b/util/iov.c
|
||||
@@ -451,23 +451,30 @@ void qemu_iovec_init_extended(
|
||||
}
|
||||
|
||||
/*
|
||||
- * Check if the contents of the iovecs are all zero
|
||||
+ * Check if the contents of subrange of qiov data is all zeroes.
|
||||
*/
|
||||
-bool qemu_iovec_is_zero(QEMUIOVector *qiov)
|
||||
+bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes)
|
||||
{
|
||||
- int i;
|
||||
- for (i = 0; i < qiov->niov; i++) {
|
||||
- size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long));
|
||||
- uint8_t *ptr = qiov->iov[i].iov_base;
|
||||
- if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
|
||||
+ struct iovec *iov;
|
||||
+ size_t current_offset;
|
||||
+
|
||||
+ assert(offset + bytes <= qiov->size);
|
||||
+
|
||||
+ iov = iov_skip_offset(qiov->iov, offset, ¤t_offset);
|
||||
+
|
||||
+ while (bytes) {
|
||||
+ uint8_t *base = (uint8_t *)iov->iov_base + current_offset;
|
||||
+ size_t len = MIN(iov->iov_len - current_offset, bytes);
|
||||
+
|
||||
+ if (!buffer_is_zero(base, len)) {
|
||||
return false;
|
||||
}
|
||||
- for (; offs < qiov->iov[i].iov_len; offs++) {
|
||||
- if (ptr[offs]) {
|
||||
- return false;
|
||||
- }
|
||||
- }
|
||||
+
|
||||
+ current_offset = 0;
|
||||
+ bytes -= len;
|
||||
+ iov++;
|
||||
}
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
--
|
||||
2.23.0
|
||||
177
util-iov-introduce-qemu_iovec_init_extended.patch
Normal file
177
util-iov-introduce-qemu_iovec_init_extended.patch
Normal file
@ -0,0 +1,177 @@
|
||||
From cff024fe856ab36db3056ba4cb1d7cfa4c39795d Mon Sep 17 00:00:00 2001
|
||||
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Date: Tue, 4 Jun 2019 19:15:03 +0300
|
||||
Subject: [PATCH] util/iov: introduce qemu_iovec_init_extended
|
||||
|
||||
Introduce new initialization API, to create requests with padding. Will
|
||||
be used in the following patch. New API uses qemu_iovec_init_buf if
|
||||
resulting io vector has only one element, to avoid extra allocations.
|
||||
So, we need to update qemu_iovec_destroy to support destroying such
|
||||
QIOVs.
|
||||
|
||||
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-id: 20190604161514.262241-2-vsementsov@virtuozzo.com
|
||||
Message-Id: <20190604161514.262241-2-vsementsov@virtuozzo.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit d953169d4840f312d3b9a54952f4a7ccfcb3b311)
|
||||
*prereq for 292d06b9
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
include/qemu/iov.h | 7 +++
|
||||
util/iov.c | 112 +++++++++++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 114 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
|
||||
index 48b45987b7..f3787a0cf7 100644
|
||||
--- a/include/qemu/iov.h
|
||||
+++ b/include/qemu/iov.h
|
||||
@@ -199,6 +199,13 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
|
||||
|
||||
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
|
||||
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
|
||||
+void qemu_iovec_init_extended(
|
||||
+ QEMUIOVector *qiov,
|
||||
+ void *head_buf, size_t head_len,
|
||||
+ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
|
||||
+ void *tail_buf, size_t tail_len);
|
||||
+void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
|
||||
+ size_t offset, size_t len);
|
||||
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
|
||||
void qemu_iovec_concat(QEMUIOVector *dst,
|
||||
QEMUIOVector *src, size_t soffset, size_t sbytes);
|
||||
diff --git a/util/iov.c b/util/iov.c
|
||||
index 74e6ca8ed7..366ff9cdd1 100644
|
||||
--- a/util/iov.c
|
||||
+++ b/util/iov.c
|
||||
@@ -353,6 +353,103 @@ void qemu_iovec_concat(QEMUIOVector *dst,
|
||||
qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * qiov_find_iov
|
||||
+ *
|
||||
+ * Return pointer to iovec structure, where byte at @offset in original vector
|
||||
+ * @iov exactly is.
|
||||
+ * Set @remaining_offset to be offset inside that iovec to the same byte.
|
||||
+ */
|
||||
+static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
|
||||
+ size_t *remaining_offset)
|
||||
+{
|
||||
+ while (offset > 0 && offset >= iov->iov_len) {
|
||||
+ offset -= iov->iov_len;
|
||||
+ iov++;
|
||||
+ }
|
||||
+ *remaining_offset = offset;
|
||||
+
|
||||
+ return iov;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * qiov_slice
|
||||
+ *
|
||||
+ * Find subarray of iovec's, containing requested range. @head would
|
||||
+ * be offset in first iov (returned by the function), @tail would be
|
||||
+ * count of extra bytes in last iovec (returned iov + @niov - 1).
|
||||
+ */
|
||||
+static struct iovec *qiov_slice(QEMUIOVector *qiov,
|
||||
+ size_t offset, size_t len,
|
||||
+ size_t *head, size_t *tail, int *niov)
|
||||
+{
|
||||
+ struct iovec *iov, *end_iov;
|
||||
+
|
||||
+ assert(offset + len <= qiov->size);
|
||||
+
|
||||
+ iov = iov_skip_offset(qiov->iov, offset, head);
|
||||
+ end_iov = iov_skip_offset(iov, *head + len, tail);
|
||||
+
|
||||
+ if (*tail > 0) {
|
||||
+ assert(*tail < end_iov->iov_len);
|
||||
+ *tail = end_iov->iov_len - *tail;
|
||||
+ end_iov++;
|
||||
+ }
|
||||
+
|
||||
+ *niov = end_iov - iov;
|
||||
+
|
||||
+ return iov;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
|
||||
+ * and @tail_buf buffer into new qiov.
|
||||
+ */
|
||||
+void qemu_iovec_init_extended(
|
||||
+ QEMUIOVector *qiov,
|
||||
+ void *head_buf, size_t head_len,
|
||||
+ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
|
||||
+ void *tail_buf, size_t tail_len)
|
||||
+{
|
||||
+ size_t mid_head, mid_tail;
|
||||
+ int total_niov, mid_niov = 0;
|
||||
+ struct iovec *p, *mid_iov;
|
||||
+
|
||||
+ if (mid_len) {
|
||||
+ mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
|
||||
+ &mid_head, &mid_tail, &mid_niov);
|
||||
+ }
|
||||
+
|
||||
+ total_niov = !!head_len + mid_niov + !!tail_len;
|
||||
+ if (total_niov == 1) {
|
||||
+ qemu_iovec_init_buf(qiov, NULL, 0);
|
||||
+ p = &qiov->local_iov;
|
||||
+ } else {
|
||||
+ qiov->niov = qiov->nalloc = total_niov;
|
||||
+ qiov->size = head_len + mid_len + tail_len;
|
||||
+ p = qiov->iov = g_new(struct iovec, qiov->niov);
|
||||
+ }
|
||||
+
|
||||
+ if (head_len) {
|
||||
+ p->iov_base = head_buf;
|
||||
+ p->iov_len = head_len;
|
||||
+ p++;
|
||||
+ }
|
||||
+
|
||||
+ if (mid_len) {
|
||||
+ memcpy(p, mid_iov, mid_niov * sizeof(*p));
|
||||
+ p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
|
||||
+ p[0].iov_len -= mid_head;
|
||||
+ p[mid_niov - 1].iov_len -= mid_tail;
|
||||
+ p += mid_niov;
|
||||
+ }
|
||||
+
|
||||
+ if (tail_len) {
|
||||
+ p->iov_base = tail_buf;
|
||||
+ p->iov_len = tail_len;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Check if the contents of the iovecs are all zero
|
||||
*/
|
||||
@@ -374,14 +471,19 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov)
|
||||
return true;
|
||||
}
|
||||
|
||||
+void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
|
||||
+ size_t offset, size_t len)
|
||||
+{
|
||||
+ qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
|
||||
+}
|
||||
+
|
||||
void qemu_iovec_destroy(QEMUIOVector *qiov)
|
||||
{
|
||||
- assert(qiov->nalloc != -1);
|
||||
+ if (qiov->nalloc != -1) {
|
||||
+ g_free(qiov->iov);
|
||||
+ }
|
||||
|
||||
- qemu_iovec_reset(qiov);
|
||||
- g_free(qiov->iov);
|
||||
- qiov->nalloc = 0;
|
||||
- qiov->iov = NULL;
|
||||
+ memset(qiov, 0, sizeof(*qiov));
|
||||
}
|
||||
|
||||
void qemu_iovec_reset(QEMUIOVector *qiov)
|
||||
--
|
||||
2.23.0
|
||||
42
vhost-user-save-features-if-the-char-dev-is-closed.patch
Normal file
42
vhost-user-save-features-if-the-char-dev-is-closed.patch
Normal file
@ -0,0 +1,42 @@
|
||||
From 7b404cae7fa2850d476c29258f03b8e77a5b4bd0 Mon Sep 17 00:00:00 2001
|
||||
From: Adrian Moreno <amorenoz@redhat.com>
|
||||
Date: Tue, 24 Sep 2019 18:20:44 +0200
|
||||
Subject: [PATCH] vhost-user: save features if the char dev is closed
|
||||
|
||||
That way the state can be correctly restored when the device is opened
|
||||
again. This might happen if the backend is restarted.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1738768
|
||||
Reported-by: Pei Zhang <pezhang@redhat.com>
|
||||
Fixes: 6ab79a20af3a ("do not call vhost_net_cleanup() on running net from char user event")
|
||||
Cc: ddstreet@canonical.com
|
||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
|
||||
Message-Id: <20190924162044.11414-1-amorenoz@redhat.com>
|
||||
Acked-by: Jason Wang <jasowang@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit c6beefd674fff8d41b90365dfccad32e53a5abcb)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
net/vhost-user.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/net/vhost-user.c b/net/vhost-user.c
|
||||
index 51921de443..014199d600 100644
|
||||
--- a/net/vhost-user.c
|
||||
+++ b/net/vhost-user.c
|
||||
@@ -235,6 +235,10 @@ static void chr_closed_bh(void *opaque)
|
||||
|
||||
s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
|
||||
|
||||
+ if (s->vhost_net) {
|
||||
+ s->acked_features = vhost_net_get_acked_features(s->vhost_net);
|
||||
+ }
|
||||
+
|
||||
qmp_set_link(name, false, &err);
|
||||
|
||||
qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event,
|
||||
--
|
||||
2.23.0
|
||||
80
virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch
Normal file
80
virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch
Normal file
@ -0,0 +1,80 @@
|
||||
From 01be50603be4f17af4318a7a3fe58dcc6dab1b31 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
|
||||
Date: Fri, 16 Aug 2019 19:15:03 +0200
|
||||
Subject: [PATCH] virtio-blk: Cancel the pending BH when the dataplane is reset
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
When 'system_reset' is called, the main loop clear the memory
|
||||
region cache before the BH has a chance to execute. Later when
|
||||
the deferred function is called, some assumptions that were
|
||||
made when scheduling them are no longer true when they actually
|
||||
execute.
|
||||
|
||||
This is what happens using a virtio-blk device (fresh RHEL7.8 install):
|
||||
|
||||
$ (sleep 12.3; echo system_reset; sleep 12.3; echo system_reset; sleep 1; echo q) \
|
||||
| qemu-system-x86_64 -m 4G -smp 8 -boot menu=on \
|
||||
-device virtio-blk-pci,id=image1,drive=drive_image1 \
|
||||
-drive file=/var/lib/libvirt/images/rhel78.qcow2,if=none,id=drive_image1,format=qcow2,cache=none \
|
||||
-device virtio-net-pci,netdev=net0,id=nic0,mac=52:54:00:c4:e7:84 \
|
||||
-netdev tap,id=net0,script=/bin/true,downscript=/bin/true,vhost=on \
|
||||
-monitor stdio -serial null -nographic
|
||||
(qemu) system_reset
|
||||
(qemu) system_reset
|
||||
(qemu) qemu-system-x86_64: hw/virtio/virtio.c:225: vring_get_region_caches: Assertion `caches != NULL' failed.
|
||||
Aborted
|
||||
|
||||
(gdb) bt
|
||||
Thread 1 (Thread 0x7f109c17b680 (LWP 10939)):
|
||||
#0 0x00005604083296d1 in vring_get_region_caches (vq=0x56040a24bdd0) at hw/virtio/virtio.c:227
|
||||
#1 0x000056040832972b in vring_avail_flags (vq=0x56040a24bdd0) at hw/virtio/virtio.c:235
|
||||
#2 0x000056040832d13d in virtio_should_notify (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1648
|
||||
#3 0x000056040832d1f8 in virtio_notify_irqfd (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1662
|
||||
#4 0x00005604082d213d in notify_guest_bh (opaque=0x56040a243ec0) at hw/block/dataplane/virtio-blk.c:75
|
||||
#5 0x000056040883dc35 in aio_bh_call (bh=0x56040a243f10) at util/async.c:90
|
||||
#6 0x000056040883dccd in aio_bh_poll (ctx=0x560409161980) at util/async.c:118
|
||||
#7 0x0000560408842af7 in aio_dispatch (ctx=0x560409161980) at util/aio-posix.c:460
|
||||
#8 0x000056040883e068 in aio_ctx_dispatch (source=0x560409161980, callback=0x0, user_data=0x0) at util/async.c:261
|
||||
#9 0x00007f10a8fca06d in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
|
||||
#10 0x0000560408841445 in glib_pollfds_poll () at util/main-loop.c:215
|
||||
#11 0x00005604088414bf in os_host_main_loop_wait (timeout=0) at util/main-loop.c:238
|
||||
#12 0x00005604088415c4 in main_loop_wait (nonblocking=0) at util/main-loop.c:514
|
||||
#13 0x0000560408416b1e in main_loop () at vl.c:1923
|
||||
#14 0x000056040841e0e8 in main (argc=20, argv=0x7ffc2c3f9c58, envp=0x7ffc2c3f9d00) at vl.c:4578
|
||||
|
||||
Fix this by cancelling the BH when the virtio dataplane is stopped.
|
||||
|
||||
[This is version of the patch was modified as discussed with Philippe on
|
||||
the mailing list thread.
|
||||
--Stefan]
|
||||
|
||||
Reported-by: Yihuang Yu <yihyu@redhat.com>
|
||||
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Fixes: https://bugs.launchpad.net/qemu/+bug/1839428
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Message-Id: <20190816171503.24761-1-philmd@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
(cherry picked from commit ebb6ff25cd888a52a64a9adc3692541c6d1d9a42)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
hw/block/dataplane/virtio-blk.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
|
||||
index 158c78f852..5fea76df85 100644
|
||||
--- a/hw/block/dataplane/virtio-blk.c
|
||||
+++ b/hw/block/dataplane/virtio-blk.c
|
||||
@@ -297,6 +297,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
|
||||
virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
|
||||
}
|
||||
|
||||
+ qemu_bh_cancel(s->bh);
|
||||
+ notify_guest_bh(s); /* final chance to notify guest */
|
||||
+
|
||||
/* Clean up guest notifier (irq) */
|
||||
k->set_guest_notifiers(qbus->parent, nvqs, false);
|
||||
|
||||
--
|
||||
2.23.0
|
||||
122
virtio-net-prevent-offloads-reset-on-migration.patch
Normal file
122
virtio-net-prevent-offloads-reset-on-migration.patch
Normal file
@ -0,0 +1,122 @@
|
||||
From 4887acf574a573137660aa98d9d422ece0a41a5a Mon Sep 17 00:00:00 2001
|
||||
From: Mikhail Sennikovsky <mikhail.sennikovskii@cloud.ionos.com>
|
||||
Date: Fri, 11 Oct 2019 15:58:04 +0200
|
||||
Subject: [PATCH] virtio-net: prevent offloads reset on migration
|
||||
|
||||
Currently offloads disabled by guest via the VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET
|
||||
command are not preserved on VM migration.
|
||||
Instead all offloads reported by guest features (via VIRTIO_PCI_GUEST_FEATURES)
|
||||
get enabled.
|
||||
What happens is: first the VirtIONet::curr_guest_offloads gets restored and offloads
|
||||
are getting set correctly:
|
||||
|
||||
#0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=0, tso6=0, ecn=0, ufo=0) at net/net.c:474
|
||||
#1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720
|
||||
#2 virtio_net_post_load_device (opaque=0x555557701ca0, version_id=11) at hw/net/virtio-net.c:2334
|
||||
#3 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577c80 <vmstate_virtio_net_device>, opaque=0x555557701ca0, version_id=11)
|
||||
at migration/vmstate.c:168
|
||||
#4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2197
|
||||
#5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036
|
||||
#6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 <vmstate_virtio_net>, opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143
|
||||
#7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829
|
||||
#8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211
|
||||
#9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395
|
||||
#10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467
|
||||
#11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449
|
||||
|
||||
However later on the features are getting restored, and offloads get reset to
|
||||
everything supported by features:
|
||||
|
||||
#0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=1, tso6=1, ecn=0, ufo=0) at net/net.c:474
|
||||
#1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720
|
||||
#2 virtio_net_set_features (vdev=0x555557701ca0, features=5104441767) at hw/net/virtio-net.c:773
|
||||
#3 virtio_set_features_nocheck (vdev=0x555557701ca0, val=5104441767) at hw/virtio/virtio.c:2052
|
||||
#4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2220
|
||||
#5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036
|
||||
#6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 <vmstate_virtio_net>, opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143
|
||||
#7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829
|
||||
#8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211
|
||||
#9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395
|
||||
#10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467
|
||||
#11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449
|
||||
|
||||
Fix this by preserving the state in saved_guest_offloads field and
|
||||
pushing out offload initialization to the new post load hook.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Mikhail Sennikovsky <mikhail.sennikovskii@cloud.ionos.com>
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
(cherry picked from commit 7788c3f2e21e35902d45809b236791383bbb613e)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
hw/net/virtio-net.c | 27 ++++++++++++++++++++++++---
|
||||
include/hw/virtio/virtio-net.h | 2 ++
|
||||
2 files changed, 26 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index b9e1cd71cf..6adb0fe252 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -2330,9 +2330,13 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
|
||||
n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
|
||||
}
|
||||
|
||||
- if (peer_has_vnet_hdr(n)) {
|
||||
- virtio_net_apply_guest_offloads(n);
|
||||
- }
|
||||
+ /*
|
||||
+ * curr_guest_offloads will be later overwritten by the
|
||||
+ * virtio_set_features_nocheck call done from the virtio_load.
|
||||
+ * Here we make sure it is preserved and restored accordingly
|
||||
+ * in the virtio_net_post_load_virtio callback.
|
||||
+ */
|
||||
+ n->saved_guest_offloads = n->curr_guest_offloads;
|
||||
|
||||
virtio_net_set_queues(n);
|
||||
|
||||
@@ -2367,6 +2371,22 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int virtio_net_post_load_virtio(VirtIODevice *vdev)
|
||||
+{
|
||||
+ VirtIONet *n = VIRTIO_NET(vdev);
|
||||
+ /*
|
||||
+ * The actual needed state is now in saved_guest_offloads,
|
||||
+ * see virtio_net_post_load_device for detail.
|
||||
+ * Restore it back and apply the desired offloads.
|
||||
+ */
|
||||
+ n->curr_guest_offloads = n->saved_guest_offloads;
|
||||
+ if (peer_has_vnet_hdr(n)) {
|
||||
+ virtio_net_apply_guest_offloads(n);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/* tx_waiting field of a VirtIONetQueue */
|
||||
static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
|
||||
.name = "virtio-net-queue-tx_waiting",
|
||||
@@ -2909,6 +2929,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
|
||||
vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
|
||||
vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
|
||||
vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
|
||||
+ vdc->post_load = virtio_net_post_load_virtio;
|
||||
vdc->vmsd = &vmstate_virtio_net_device;
|
||||
}
|
||||
|
||||
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
|
||||
index b96f0c643f..07a9319f4b 100644
|
||||
--- a/include/hw/virtio/virtio-net.h
|
||||
+++ b/include/hw/virtio/virtio-net.h
|
||||
@@ -182,6 +182,8 @@ struct VirtIONet {
|
||||
char *netclient_name;
|
||||
char *netclient_type;
|
||||
uint64_t curr_guest_offloads;
|
||||
+ /* used on saved state restore phase to preserve the curr_guest_offloads */
|
||||
+ uint64_t saved_guest_offloads;
|
||||
AnnounceTimer announce_timer;
|
||||
bool needs_vnet_hdr_swap;
|
||||
bool mtu_bypass_backend;
|
||||
--
|
||||
2.23.0
|
||||
63
virtio-new-post_load-hook.patch
Normal file
63
virtio-new-post_load-hook.patch
Normal file
@ -0,0 +1,63 @@
|
||||
From 8010d3fce008dd13f155bc0babfe236ea44a2712 Mon Sep 17 00:00:00 2001
|
||||
From: "Michael S. Tsirkin" <mst@redhat.com>
|
||||
Date: Fri, 11 Oct 2019 15:58:03 +0200
|
||||
Subject: [PATCH] virtio: new post_load hook
|
||||
|
||||
Post load hook in virtio vmsd is called early while device is processed,
|
||||
and when VirtIODevice core isn't fully initialized. Most device
|
||||
specific code isn't ready to deal with a device in such state, and
|
||||
behaves weirdly.
|
||||
|
||||
Add a new post_load hook in a device class instead. Devices should use
|
||||
this unless they specifically want to verify the migration stream as
|
||||
it's processed, e.g. for bounds checking.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Suggested-by: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||
Cc: Mikhail Sennikovsky <mikhail.sennikovskii@cloud.ionos.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
(cherry picked from commit 1dd713837cac8ec5a97d3b8492d72ce5ac94803c)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
hw/virtio/virtio.c | 7 +++++++
|
||||
include/hw/virtio/virtio.h | 6 ++++++
|
||||
2 files changed, 13 insertions(+)
|
||||
|
||||
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
|
||||
index a94ea18a9c..7c3822c3a0 100644
|
||||
--- a/hw/virtio/virtio.c
|
||||
+++ b/hw/virtio/virtio.c
|
||||
@@ -2287,6 +2287,13 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
+ if (vdc->post_load) {
|
||||
+ ret = vdc->post_load(vdev);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
|
||||
index b189788cb2..f9f62370e9 100644
|
||||
--- a/include/hw/virtio/virtio.h
|
||||
+++ b/include/hw/virtio/virtio.h
|
||||
@@ -158,6 +158,12 @@ typedef struct VirtioDeviceClass {
|
||||
*/
|
||||
void (*save)(VirtIODevice *vdev, QEMUFile *f);
|
||||
int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id);
|
||||
+ /* Post load hook in vmsd is called early while device is processed, and
|
||||
+ * when VirtIODevice isn't fully initialized. Devices should use this instead,
|
||||
+ * unless they specifically want to verify the migration stream as it's
|
||||
+ * processed, e.g. for bounds checking.
|
||||
+ */
|
||||
+ int (*post_load)(VirtIODevice *vdev);
|
||||
const VMStateDescription *vmsd;
|
||||
} VirtioDeviceClass;
|
||||
|
||||
--
|
||||
2.23.0
|
||||
49
vpc-Return-0-from-vpc_co_create-on-success.patch
Normal file
49
vpc-Return-0-from-vpc_co_create-on-success.patch
Normal file
@ -0,0 +1,49 @@
|
||||
From 97c478c355fee96eb2b740313f50561e69b6f305 Mon Sep 17 00:00:00 2001
|
||||
From: Max Reitz <mreitz@redhat.com>
|
||||
Date: Mon, 2 Sep 2019 21:33:16 +0200
|
||||
Subject: [PATCH] vpc: Return 0 from vpc_co_create() on success
|
||||
|
||||
blockdev_create_run() directly uses .bdrv_co_create()'s return value as
|
||||
the job's return value. Jobs must return 0 on success, not just any
|
||||
nonnegative value. Therefore, using blockdev-create for VPC images may
|
||||
currently fail as the vpc driver may return a positive integer.
|
||||
|
||||
Because there is no point in returning a positive integer anywhere in
|
||||
the block layer (all non-negative integers are generally treated as
|
||||
complete success), we probably do not want to add more such cases.
|
||||
Therefore, fix this problem by making the vpc driver always return 0 in
|
||||
case of success.
|
||||
|
||||
Suggested-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 1a37e3124407b5a145d44478d3ecbdb89c63789f)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
block/vpc.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/vpc.c b/block/vpc.c
|
||||
index d4776ee8a5..3a88e28e2b 100644
|
||||
--- a/block/vpc.c
|
||||
+++ b/block/vpc.c
|
||||
@@ -885,6 +885,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
+ ret = 0;
|
||||
fail:
|
||||
return ret;
|
||||
}
|
||||
@@ -908,7 +909,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- return ret;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts,
|
||||
--
|
||||
2.23.0
|
||||
60
x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch
Normal file
60
x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch
Normal file
@ -0,0 +1,60 @@
|
||||
From 725dfa851f8e1de8653f41a4bd38c7f98757eb40 Mon Sep 17 00:00:00 2001
|
||||
From: Igor Mammedov <imammedo@redhat.com>
|
||||
Date: Mon, 2 Sep 2019 08:02:22 -0400
|
||||
Subject: [PATCH] x86: do not advertise die-id in query-hotpluggbale-cpus if
|
||||
'-smp dies' is not set
|
||||
|
||||
Commit 176d2cda0 (i386/cpu: Consolidate die-id validity in smp context) added
|
||||
new 'die-id' topology property to CPUs and exposed it via QMP command
|
||||
query-hotpluggable-cpus, which broke -device/device_add cpu-foo for existing
|
||||
users that do not support die-id/dies yet. That's would be fine if it happened
|
||||
to new machine type only but it also happened to old machine types,
|
||||
which breaks migration from old QEMU to the new one, for example following CLI:
|
||||
|
||||
OLD-QEMU -M pc-i440fx-4.0 -smp 1,max_cpus=2 \
|
||||
-device qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id
|
||||
is not able to start with new QEMU, complaining about invalid die-id.
|
||||
|
||||
After discovering regression, the patch
|
||||
"pc: Don't make die-id mandatory unless necessary"
|
||||
makes die-id optional so old CLI would work.
|
||||
|
||||
However it's not enough as new QEMU still exposes die-id via query-hotpluggbale-cpus
|
||||
QMP command, so the users that started old machine type on new QEMU, using all
|
||||
properties (including die-id) received from QMP command (as required), won't be
|
||||
able to start old QEMU using the same properties since it doesn't support die-id.
|
||||
|
||||
Fix it by hiding die-id in query-hotpluggbale-cpus for all machine types in case
|
||||
'-smp dies' is not provided on CLI or -smp dies = 1', in which case smp_dies == 1
|
||||
and APIC ID is calculated in default way (as it was before DIE support) so we won't
|
||||
need compat code as in both cases the topology provided to guest via CPUID is the same.
|
||||
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Message-Id: <20190902120222.6179-1-imammedo@redhat.com>
|
||||
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
|
||||
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
|
||||
(cherry picked from commit c6c1bb89fb46f3b88f832e654cf5a6f7941aac51)
|
||||
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
---
|
||||
hw/i386/pc.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
||||
index 947f81070f..d011733ff7 100644
|
||||
--- a/hw/i386/pc.c
|
||||
+++ b/hw/i386/pc.c
|
||||
@@ -2887,8 +2887,10 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
|
||||
ms->smp.threads, &topo);
|
||||
ms->possible_cpus->cpus[i].props.has_socket_id = true;
|
||||
ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id;
|
||||
- ms->possible_cpus->cpus[i].props.has_die_id = true;
|
||||
- ms->possible_cpus->cpus[i].props.die_id = topo.die_id;
|
||||
+ if (pcms->smp_dies > 1) {
|
||||
+ ms->possible_cpus->cpus[i].props.has_die_id = true;
|
||||
+ ms->possible_cpus->cpus[i].props.die_id = topo.die_id;
|
||||
+ }
|
||||
ms->possible_cpus->cpus[i].props.has_core_id = true;
|
||||
ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
|
||||
ms->possible_cpus->cpus[i].props.has_thread_id = true;
|
||||
--
|
||||
2.23.0
|
||||
Loading…
x
Reference in New Issue
Block a user