qemu/vhost-Shadow-virtqueue-buffers-forwarding.patch

685 lines
22 KiB
Diff
Raw Normal View History

QEMU update to version 6.2.0-84(master) - hw/arm/fsl-imx: Do not ignore Error argument - hw/net/cadence_gem.c: spelling fixes: Octects - tests/qtest: check the return value - libvhost-user: Fix VHOST_USER_GET_MAX_MEM_SLOTS reply mainline inclusion commit 69a5daec06f423843ce1bb9be5fb049314996f78 category: bugfix - io_uring: fix short read slow path mainline inclusion commit c06fc7ce147e57ab493bad9263f1601b8298484b category: bugfix - libvhost-user: Fix VHOST_USER_ADD_MEM_REG reply mainline inclusion commit 7f27d20ded2f480f3e66d03f90ea71507b834276 category: bugfix - qsd: Unlink absolute PID file path mainline inclusion commit 9d8f8233b9fa525a7e37350fbc18877051128c5d category: bugfix - net: Fix a misleading error message - vdpa: stop all svq on device deletion - vhost: release virtqueue objects in error path - vhost: fix the fd leak - virtio: i2c: Check notifier helpers for VIRTIO_CONFIG_IRQ_IDX - hw/virtio: fix typo in VIRTIO_CONFIG_IRQ_IDX comments - virtio-net: clear guest_announce feature if no cvq backend - vdpa: fix VHOST_BACKEND_F_IOTLB_ASID flag check - vdpa: do not block migration if device has cvq and x-svq=on - vdpa net: block migration if the device has CVQ - vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_mq() - vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_mac() - vdpa: fix not using CVQ buffer in case of error - vdpa: Fix possible use-after-free for VirtQueueElement - hw/virtio: fix vhost_user_read tracepoint - vhost: Fix false positive out-of-bounds - vhost: fix possible wrap in SVQ descriptor ring - vhost: move iova_tree set to vhost_svq_start - vhost: Always store new kick fd on vhost_svq_set_svq_kick_fd - virtio-crypto: verify src&dst buffer length for sym request - vdpa: commit all host notifier MRs in a single MR transaction - vdpa: harden the error path if get_iova_range failed - vdpa-dev: get iova range explicitly - virtio-pci: add support for configure interrupt - virtio-mmio: add support for configure interrupt - virtio-net: add support for configure interrupt - vhost: add support for configure interrupt - virtio: add support for configure interrupt - vhost-vdpa: add support for config interrupt - vhost: introduce new VhostOps vhost_set_config_call - virtio-pci: decouple the single vector from the interrupt process - virtio-pci: decouple notifier from interrupt process - virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX - vdpa: do not handle VIRTIO_NET_F_GUEST_ANNOUNCE in vhost-vdpa - vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in vhost_vdpa_net_handle_ctrl_avail - vhost: fix vq dirty bitmap syncing when vIOMMU is enabled - hw/virtio: gracefully handle unset vhost_dev vdev - hw/virtio/vhost: Fix typo in comment. - vdpa: always start CVQ in SVQ mode if possible - vdpa: add shadow_data to vhost_vdpa - vdpa: store x-svq parameter in VhostVDPAState - vdpa: add asid parameter to vhost_vdpa_dma_map/unmap - vdpa: allocate SVQ array unconditionally - vdpa: move SVQ vring features check to net/ - vdpa: request iova_range only once - vdpa: add vhost_vdpa_net_valid_svq_features - vhost: allocate SVQ device file descriptors at device start - vhost: set SVQ device call handler at SVQ start - vdpa: use v->shadow_vqs_enabled in vhost_vdpa_svqs_start & stop - vhost: enable vrings in vhost_dev_start() for vhost-user devices - vhost-vdpa: fix assert !virtio_net_get_subqueue(nc)->async_tx.elem in virtio_net_reset - net/vhost-vdpa.c: Fix clang compilation failure - vhost-vdpa: allow passing opened vhostfd to vhost-vdpa - vdpa: Remove shadow CVQ command check - vdpa: Delete duplicated vdpa_feature_bits entry - hw/virtio: add some vhost-user trace events - vdpa: Allow MQ feature in SVQ - virtio-net: Update virtio-net curr_queue_pairs in vdpa backends - vdpa: validate MQ CVQ commands - vdpa: Add vhost_vdpa_net_load_mq - vdpa: extract vhost_vdpa_net_load_mac from vhost_vdpa_net_load - vdpa: Make VhostVDPAState cvq_cmd_in_buffer control ack type - vdpa: Delete CVQ migration blocker - vdpa: Add virtio-net mac address via CVQ at start - vhost_net: add NetClientState->load() callback - vdpa: extract vhost_vdpa_net_cvq_add from vhost_vdpa_net_handle_ctrl_avail - vdpa: Move command buffers map to start of net device - vdpa: add net_vhost_vdpa_cvq_info NetClientInfo - vhost_net: Add NetClientInfo stop callback - vhost_net: Add NetClientInfo start callback - vdpa: Use ring hwaddr at vhost_vdpa_svq_unmap_ring - vdpa: Make SVQ vring unmapping return void - vdpa: Remove SVQ vring from iova_tree at shutdown - util: accept iova_tree_remove_parameter by value - vdpa: do not save failed dma maps in SVQ iova tree - vdpa: Skip the maps not in the iova tree - vdpa: Fix file descriptor leak on get features error - vdpa: Fix memory listener deletions of iova tree - vhost: Get vring base from vq, not svq - vdpa: Add x-svq to NetdevVhostVDPAOptions - vdpa: Add device migration blocker - vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs - vdpa: Buffer CVQ support on shadow virtqueue - vdpa: manual forward CVQ buffers - vdpa: Export vhost_vdpa_dma_map and unmap calls - vhost: Add svq avail_handler callback - vhost: add vhost_svq_poll - vhost: Expose vhost_svq_add - vhost: add vhost_svq_push_elem - vhost: Track number of descs in SVQDescState - vhost: Add SVQDescState - vhost: Decouple vhost_svq_add from VirtQueueElement - vhost: Check for queue full at vhost_svq_add - vhost: Move vhost_svq_kick call to vhost_svq_add - vhost: Reorder vhost_svq_kick - vdpa: Avoid compiler to squash reads to used idx - virtio-net: Expose ctrl virtqueue logic - virtio-net: Expose MAC_TABLE_ENTRIES - vhost: move descriptor translation to vhost_svq_vring_write_descs - util: Return void on iova_tree_remove - virtio-net: don't handle mq request in userspace handler for vhost-vdpa - vhost-vdpa: change name and polarity for vhost_vdpa_one_time_request() - vhost-vdpa: backend feature should set only once - vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa - virtio-net: align ctrl_vq index for non-mq guest for vhost_vdpa - virtio: add vhost support for virtio devices - include/hw: start documenting the vhost API - hw/virtio: add vhost_user_[read|write] trace points - vhost: Fix element in vhost_svq_add failure - vdpa: Fix index calculus at vhost_vdpa_svqs_start - vdpa: Fix bad index calculus at vhost_vdpa_get_vring_base - vhost: Fix device's used descriptor dequeue - vhost: Track descriptor chain in private at SVQ - vdpa: Add missing tracing to batch mapping functions - vhost-vdpa: fix typo in a comment - virtio: fix --enable-vhost-user build on non-Linux - vdpa: Expose VHOST_F_LOG_ALL on SVQ - vdpa: Never set log_base addr if SVQ is enabled - vdpa: Adapt vhost_vdpa_get_vring_base to SVQ - vdpa: Add custom IOTLB translations to SVQ - vhost: Add VhostIOVATree - util: add iova_tree_find_iova - util: Add iova_tree_alloc_map - vhost: Shadow virtqueue buffers forwarding - vdpa: adapt vhost_ops callbacks to svq - virtio: Add vhost_svq_get_vring_addr - vhost: Add vhost_svq_valid_features to shadow vq - vhost: Add Shadow VirtQueue call forwarding capabilities - vhost: Add Shadow VirtQueue kick forwarding capabilities - vhost: Add VhostShadowVirtqueue - vdpa: Make ncs autofree - Revert "virtio: introduce macro IRTIO_CONFIG_IRQ_IDX" - Revert "virtio-pci: decouple notifier from interrupt process" - Revert "virtio-pci: decouple the single vector from the interrupt process" - Revert "vhost-vdpa: add support for config interrupt" - Revert "virtio: add support for configure interrupt" - Revert "vhost: add support for configure interrupt" - Revert "virtio-net: add support for configure interrupt" - Revert "virtio-mmio: add support for configure interrupt" - Revert "virtio-pci: add support for configure interrupt" - Revert "vhost: introduce new VhostOps vhost_set_config_call" - virtio: signal after wrapping packed used_idx - target/i386: Adjust feature level according to FEAT_7_1_EDX - target/i386: Add new CPU model GraniteRapids - target/i386: Add support for PREFETCHIT0/1 in CPUID enumeration - target/i386: Add support for AVX-NE-CONVERT in CPUID enumeration - target/i386: Add support for AVX-VNNI-INT8 in CPUID enumeration - target/i386: Add support for AVX-IFMA in CPUID enumeration - target/i386: Add support for AMX-FP16 in CPUID enumeration - target/i386: Add support for CMPCCXADD in CPUID enumeration - tracetool: avoid invalid escape in Python string - hw/pvrdma: Protect against buggy or malicious guest driver - vga: avoid crash if no default vga card mainline inclusion commit 6985d8ede92494f3b791de01e8ee9306eb6d5e4a category: bugfix - qom/object: Remove circular include dependency mainline inclusion commit 5bba9bcfbb42e7c016626420e148a1bf1b080835 category: bugfix - artist: set memory region owners for buffers to the artist device mainline inclusion commit 39fbaeca096a9bf6cbe2af88572c1cb2aa62aa8c category: bugfix - virtio-iommu: Fix the partial copy of probe request mainline inclusion commit 45461aace83d961e933b27519b81d17b4c690514 category: bugfix - e1000: set RX descriptor status in a separate operation mainline inclusion commit 034d00d4858161e1d4cff82d8d230bce874a04d3 category: bugfix - vhost: introduce new VhostOps vhost_set_config_call - vhost: stick to -errno error return convention - vhost-user: stick to -errno error return convention - vhost-vdpa: stick to -errno error return convention - virtio-pci: add support for configure interrupt - virtio-mmio: add support for configure interrupt - virtio-net: add support for configure interrupt - vhost: add support for configure interrupt - virtio: add support for configure interrupt - vhost-vdpa: add support for config interrupt - virtio-pci: decouple the single vector from the interrupt process - virtio-pci: decouple notifier from interrupt process - virtio: introduce macro IRTIO_CONFIG_IRQ_IDX - pci: Fix the update of interrupt disable bit in PCI_COMMAND register - hw/timer/npcm7xx_timer: Prevent timer from counting down past zero - tpm_crb: mark command buffer as dirty on request completion mainline inclusion commit e37a0ef4605e5d2041785ff3fc89ca6021faf7a0 category: bugfix - pci: fix overflow in snprintf string formatting mainline inclusion commit 36f18c6989a3d1ff1d7a0e50b0868ef3958299b4 category: bugfix - hw/usb/hcd-ehci: fix writeback order mainline inclusion commit f471e8b060798f26a7fc339c6152f82f22a7b33d category: bugfix - qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all mainline inclusion commit 3f42906c9ab2c777a895b48b87b8107167e4a275 category: bugfix - semihosting/config: Merge --semihosting-config option groups mainline inclusion commit 90c072e063737e9e8f431489bbd334452f89056e category: bugfix - semihosting: fix memleak at semihosting_arg_fallback - target/i386: Export GDS_NO bit to guests Signed-off-by: Jiabo Feng <fengjiabo1@huawei.com>
2023-11-28 15:57:41 +08:00
From bb7c23fd4979edaeeedf05e05cfed0d086430500 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Mon, 14 Mar 2022 18:34:47 +0100
Subject: [PATCH] vhost: Shadow virtqueue buffers forwarding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Initial version of shadow virtqueue that actually forward buffers. There
is no iommu support at the moment, and that will be addressed in future
patches of this series. Since all vhost-vdpa devices use forced IOMMU,
this means that SVQ is not usable at this point of the series on any
device.
For simplicity it only supports modern devices, that expects vring
in little endian, with split ring and no event idx or indirect
descriptors. Support for them will not be added in this series.
It reuses the VirtQueue code for the device part. The driver part is
based on Linux's virtio_ring driver, but with stripped functionality
and optimizations so it's easier to review.
However, forwarding buffers have some particular pieces: One of the most
unexpected ones is that a guest's buffer can expand through more than
one descriptor in SVQ. While this is handled gracefully by qemu's
emulated virtio devices, it may cause unexpected SVQ queue full. This
patch also solves it by checking for this condition at both guest's
kicks and device's calls. The code may be more elegant in the future if
SVQ code runs in its own iocontext.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: fangyi <eric.fangyi@huawei.com>
---
hw/virtio/vhost-shadow-virtqueue.c | 351 ++++++++++++++++++++++++++++-
hw/virtio/vhost-shadow-virtqueue.h | 26 +++
hw/virtio/vhost-vdpa.c | 155 ++++++++++++-
3 files changed, 521 insertions(+), 11 deletions(-)
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 573ac0d9cf..46e94f0861 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -13,6 +13,7 @@
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "qemu/main-loop.h"
+#include "qemu/log.h"
#include "linux-headers/linux/vhost.h"
/**
@@ -59,28 +60,307 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
}
/**
- * Forward guest notifications.
+ * Number of descriptors that the SVQ can make available from the guest.
+ *
+ * @svq: The svq
+ */
+static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
+{
+ return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
+}
+
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
+ const struct iovec *iovec, size_t num,
+ bool more_descs, bool write)
+{
+ uint16_t i = svq->free_head, last = svq->free_head;
+ unsigned n;
+ uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
+ vring_desc_t *descs = svq->vring.desc;
+
+ if (num == 0) {
+ return;
+ }
+
+ for (n = 0; n < num; n++) {
+ if (more_descs || (n + 1 < num)) {
+ descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
+ } else {
+ descs[i].flags = flags;
+ }
+ descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
+ descs[i].len = cpu_to_le32(iovec[n].iov_len);
+
+ last = i;
+ i = cpu_to_le16(descs[i].next);
+ }
+
+ svq->free_head = le16_to_cpu(descs[last].next);
+}
+
+static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
+ VirtQueueElement *elem, unsigned *head)
+{
+ unsigned avail_idx;
+ vring_avail_t *avail = svq->vring.avail;
+
+ *head = svq->free_head;
+
+ /* We need some descriptors here */
+ if (unlikely(!elem->out_num && !elem->in_num)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Guest provided element with no descriptors");
+ return false;
+ }
+
+ vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
+ false);
+ vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
+
+ /*
+ * Put the entry in the available array (but don't update avail->idx until
+ * they do sync).
+ */
+ avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
+ avail->ring[avail_idx] = cpu_to_le16(*head);
+ svq->shadow_avail_idx++;
+
+ /* Update the avail index after write the descriptor */
+ smp_wmb();
+ avail->idx = cpu_to_le16(svq->shadow_avail_idx);
+
+ return true;
+}
+
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
+{
+ unsigned qemu_head;
+ bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
+ if (unlikely(!ok)) {
+ return false;
+ }
+
+ svq->ring_id_maps[qemu_head] = elem;
+ return true;
+}
+
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
+{
+ /*
+ * We need to expose the available array entries before checking the used
+ * flags
+ */
+ smp_mb();
+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
+ return;
+ }
+
+ event_notifier_set(&svq->hdev_kick);
+}
+
+/**
+ * Forward available buffers.
+ *
+ * @svq: Shadow VirtQueue
+ *
+ * Note that this function does not guarantee that all guest's available
+ * buffers are available to the device in SVQ avail ring. The guest may have
+ * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
+ * qemu vaddr.
+ *
+ * If that happens, guest's kick notifications will be disabled until the
+ * device uses some buffers.
+ */
+static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
+{
+ /* Clear event notifier */
+ event_notifier_test_and_clear(&svq->svq_kick);
+
+ /* Forward to the device as many available buffers as possible */
+ do {
+ virtio_queue_set_notification(svq->vq, false);
+
+ while (true) {
+ VirtQueueElement *elem;
+ bool ok;
+
+ if (svq->next_guest_avail_elem) {
+ elem = g_steal_pointer(&svq->next_guest_avail_elem);
+ } else {
+ elem = virtqueue_pop(svq->vq, sizeof(*elem));
+ }
+
+ if (!elem) {
+ break;
+ }
+
+ if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
+ /*
+ * This condition is possible since a contiguous buffer in GPA
+ * does not imply a contiguous buffer in qemu's VA
+ * scatter-gather segments. If that happens, the buffer exposed
+ * to the device needs to be a chain of descriptors at this
+ * moment.
+ *
+ * SVQ cannot hold more available buffers if we are here:
+ * queue the current guest descriptor and ignore further kicks
+ * until some elements are used.
+ */
+ svq->next_guest_avail_elem = elem;
+ return;
+ }
+
+ ok = vhost_svq_add(svq, elem);
+ if (unlikely(!ok)) {
+ /* VQ is broken, just return and ignore any other kicks */
+ return;
+ }
+ vhost_svq_kick(svq);
+ }
+
+ virtio_queue_set_notification(svq->vq, true);
+ } while (!virtio_queue_empty(svq->vq));
+}
+
+/**
+ * Handle guest's kick.
*
* @n: guest kick event notifier, the one that guest set to notify svq.
*/
-static void vhost_handle_guest_kick(EventNotifier *n)
+static void vhost_handle_guest_kick_notifier(EventNotifier *n)
{
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
event_notifier_test_and_clear(n);
- event_notifier_set(&svq->hdev_kick);
+ vhost_handle_guest_kick(svq);
+}
+
+static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
+{
+ if (svq->last_used_idx != svq->shadow_used_idx) {
+ return true;
+ }
+
+ svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
+
+ return svq->last_used_idx != svq->shadow_used_idx;
}
/**
- * Forward vhost notifications
+ * Enable vhost device calls after disable them.
+ *
+ * @svq: The svq
+ *
+ * It returns false if there are pending used buffers from the vhost device,
+ * avoiding the possible races between SVQ checking for more work and enabling
+ * callbacks. True if SVQ used vring has no more pending buffers.
+ */
+static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
+{
+ svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
+ /* Make sure the flag is written before the read of used_idx */
+ smp_mb();
+ return !vhost_svq_more_used(svq);
+}
+
+static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
+{
+ svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
+}
+
+static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
+ uint32_t *len)
+{
+ vring_desc_t *descs = svq->vring.desc;
+ const vring_used_t *used = svq->vring.used;
+ vring_used_elem_t used_elem;
+ uint16_t last_used;
+
+ if (!vhost_svq_more_used(svq)) {
+ return NULL;
+ }
+
+ /* Only get used array entries after they have been exposed by dev */
+ smp_rmb();
+ last_used = svq->last_used_idx & (svq->vring.num - 1);
+ used_elem.id = le32_to_cpu(used->ring[last_used].id);
+ used_elem.len = le32_to_cpu(used->ring[last_used].len);
+
+ svq->last_used_idx++;
+ if (unlikely(used_elem.id >= svq->vring.num)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
+ svq->vdev->name, used_elem.id);
+ return NULL;
+ }
+
+ if (unlikely(!svq->ring_id_maps[used_elem.id])) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Device %s says index %u is used, but it was not available",
+ svq->vdev->name, used_elem.id);
+ return NULL;
+ }
+
+ descs[used_elem.id].next = svq->free_head;
+ svq->free_head = used_elem.id;
+
+ *len = used_elem.len;
+ return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
+}
+
+static void vhost_svq_flush(VhostShadowVirtqueue *svq,
+ bool check_for_avail_queue)
+{
+ VirtQueue *vq = svq->vq;
+
+ /* Forward as many used buffers as possible. */
+ do {
+ unsigned i = 0;
+
+ vhost_svq_disable_notification(svq);
+ while (true) {
+ uint32_t len;
+ g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
+ if (!elem) {
+ break;
+ }
+
+ if (unlikely(i >= svq->vring.num)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "More than %u used buffers obtained in a %u size SVQ",
+ i, svq->vring.num);
+ virtqueue_fill(vq, elem, len, i);
+ virtqueue_flush(vq, i);
+ return;
+ }
+ virtqueue_fill(vq, elem, len, i++);
+ }
+
+ virtqueue_flush(vq, i);
+ event_notifier_set(&svq->svq_call);
+
+ if (check_for_avail_queue && svq->next_guest_avail_elem) {
+ /*
+ * Avail ring was full when vhost_svq_flush was called, so it's a
+ * good moment to make more descriptors available if possible.
+ */
+ vhost_handle_guest_kick(svq);
+ }
+ } while (!vhost_svq_enable_notification(svq));
+}
+
+/**
+ * Forward used buffers.
*
* @n: hdev call event notifier, the one that device set to notify svq.
+ *
+ * Note that we are not making any buffers available in the loop, there is no
+ * way that it runs more than virtqueue size times.
*/
static void vhost_svq_handle_call(EventNotifier *n)
{
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
hdev_call);
event_notifier_test_and_clear(n);
- event_notifier_set(&svq->svq_call);
+ vhost_svq_flush(svq, true);
}
/**
@@ -161,7 +441,41 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
if (poll_start) {
event_notifier_init_fd(svq_kick, svq_kick_fd);
event_notifier_set(svq_kick);
- event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
+ }
+}
+
+/**
+ * Start the shadow virtqueue operation.
+ *
+ * @svq: Shadow Virtqueue
+ * @vdev: VirtIO device
+ * @vq: Virtqueue to shadow
+ */
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
+ VirtQueue *vq)
+{
+ size_t desc_size, driver_size, device_size;
+
+ svq->next_guest_avail_elem = NULL;
+ svq->shadow_avail_idx = 0;
+ svq->shadow_used_idx = 0;
+ svq->last_used_idx = 0;
+ svq->vdev = vdev;
+ svq->vq = vq;
+
+ svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
+ driver_size = vhost_svq_driver_area_size(svq);
+ device_size = vhost_svq_device_area_size(svq);
+ svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size);
+ desc_size = sizeof(vring_desc_t) * svq->vring.num;
+ svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
+ memset(svq->vring.desc, 0, driver_size);
+ svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
+ memset(svq->vring.used, 0, device_size);
+ svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
+ for (unsigned i = 0; i < svq->vring.num - 1; i++) {
+ svq->vring.desc[i].next = cpu_to_le16(i + 1);
}
}
@@ -172,6 +486,31 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
void vhost_svq_stop(VhostShadowVirtqueue *svq)
{
event_notifier_set_handler(&svq->svq_kick, NULL);
+ g_autofree VirtQueueElement *next_avail_elem = NULL;
+
+ if (!svq->vq) {
+ return;
+ }
+
+ /* Send all pending used descriptors to guest */
+ vhost_svq_flush(svq, false);
+
+ for (unsigned i = 0; i < svq->vring.num; ++i) {
+ g_autofree VirtQueueElement *elem = NULL;
+ elem = g_steal_pointer(&svq->ring_id_maps[i]);
+ if (elem) {
+ virtqueue_detach_element(svq->vq, elem, 0);
+ }
+ }
+
+ next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
+ if (next_avail_elem) {
+ virtqueue_detach_element(svq->vq, next_avail_elem, 0);
+ }
+ svq->vq = NULL;
+ g_free(svq->ring_id_maps);
+ qemu_vfree(svq->vring.desc);
+ qemu_vfree(svq->vring.used);
}
/**
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
index 82cea1c3fa..38b3b91ca7 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -36,6 +36,30 @@ typedef struct VhostShadowVirtqueue {
/* Guest's call notifier, where the SVQ calls guest. */
EventNotifier svq_call;
+
+ /* Virtio queue shadowing */
+ VirtQueue *vq;
+
+ /* Virtio device */
+ VirtIODevice *vdev;
+
+ /* Map for use the guest's descriptors */
+ VirtQueueElement **ring_id_maps;
+
+ /* Next VirtQueue element that guest made available */
+ VirtQueueElement *next_guest_avail_elem;
+
+ /* Next head to expose to the device */
+ uint16_t shadow_avail_idx;
+
+ /* Next free descriptor */
+ uint16_t free_head;
+
+ /* Last seen used idx */
+ uint16_t shadow_used_idx;
+
+ /* Next head to consume from the device */
+ uint16_t last_used_idx;
} VhostShadowVirtqueue;
bool vhost_svq_valid_features(uint64_t features, Error **errp);
@@ -47,6 +71,8 @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
+ VirtQueue *vq);
void vhost_svq_stop(VhostShadowVirtqueue *svq);
VhostShadowVirtqueue *vhost_svq_new(void);
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 2f0e6a9bef..db34f26246 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -779,9 +779,9 @@ static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
* Note that this function does not rewind kick file descriptor if cannot set
* call one.
*/
-static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
- VhostShadowVirtqueue *svq, unsigned idx,
- Error **errp)
+static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
+ VhostShadowVirtqueue *svq, unsigned idx,
+ Error **errp)
{
struct vhost_vring_file file = {
.index = dev->vq_index + idx,
@@ -793,7 +793,7 @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
if (unlikely(r != 0)) {
error_setg_errno(errp, -r, "Can't set device kick fd");
- return false;
+ return r;
}
event_notifier = &svq->hdev_call;
@@ -803,6 +803,95 @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
error_setg_errno(errp, -r, "Can't set device call fd");
}
+ return r;
+}
+
+/**
+ * Unmap a SVQ area in the device
+ */
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
+ hwaddr size)
+{
+ int r;
+
+ size = ROUND_UP(size, qemu_real_host_page_size);
+ r = vhost_vdpa_dma_unmap(v, iova, size);
+ return r == 0;
+}
+
+static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
+ const VhostShadowVirtqueue *svq)
+{
+ struct vhost_vdpa *v = dev->opaque;
+ struct vhost_vring_addr svq_addr;
+ size_t device_size = vhost_svq_device_area_size(svq);
+ size_t driver_size = vhost_svq_driver_area_size(svq);
+ bool ok;
+
+ vhost_svq_get_vring_addr(svq, &svq_addr);
+
+ ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
+ if (unlikely(!ok)) {
+ return false;
+ }
+
+ return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
+}
+
+/**
+ * Map the shadow virtqueue rings in the device
+ *
+ * @dev: The vhost device
+ * @svq: The shadow virtqueue
+ * @addr: Assigned IOVA addresses
+ * @errp: Error pointer
+ */
+static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
+ const VhostShadowVirtqueue *svq,
+ struct vhost_vring_addr *addr,
+ Error **errp)
+{
+ struct vhost_vdpa *v = dev->opaque;
+ size_t device_size = vhost_svq_device_area_size(svq);
+ size_t driver_size = vhost_svq_driver_area_size(svq);
+ int r;
+
+ ERRP_GUARD();
+ vhost_svq_get_vring_addr(svq, addr);
+
+ r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
+ (void *)(uintptr_t)addr->desc_user_addr, true);
+ if (unlikely(r != 0)) {
+ error_setg_errno(errp, -r, "Cannot create vq driver region: ");
+ return false;
+ }
+
+ r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
+ (void *)(intptr_t)addr->used_user_addr, false);
+ if (unlikely(r != 0)) {
+ error_setg_errno(errp, -r, "Cannot create vq device region: ");
+ }
+
+ return r == 0;
+}
+
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
+ VhostShadowVirtqueue *svq, unsigned idx,
+ Error **errp)
+{
+ uint16_t vq_index = dev->vq_index + idx;
+ struct vhost_vring_state s = {
+ .index = vq_index,
+ };
+ int r;
+
+ r = vhost_vdpa_set_dev_vring_base(dev, &s);
+ if (unlikely(r)) {
+ error_setg_errno(errp, -r, "Cannot set vring base");
+ return false;
+ }
+
+ r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
return r == 0;
}
@@ -817,10 +906,62 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
}
for (i = 0; i < v->shadow_vqs->len; ++i) {
+ VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
+ struct vhost_vring_addr addr = {
+ .index = i,
+ };
+ int r;
bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
if (unlikely(!ok)) {
- error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+ goto err;
+ }
+
+ vhost_svq_start(svq, dev->vdev, vq);
+ ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
+ if (unlikely(!ok)) {
+ goto err_map;
+ }
+
+ /* Override vring GPA set by vhost subsystem */
+ r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
+ if (unlikely(r != 0)) {
+ error_setg_errno(&err, -r, "Cannot set device address");
+ goto err_set_addr;
+ }
+ }
+
+ return true;
+
+err_set_addr:
+ vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
+
+err_map:
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
+
+err:
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+ for (unsigned j = 0; j < i; ++j) {
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
+ vhost_vdpa_svq_unmap_rings(dev, svq);
+ vhost_svq_stop(svq);
+ }
+
+ return false;
+}
+
+static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
+{
+ struct vhost_vdpa *v = dev->opaque;
+
+ if (!v->shadow_vqs) {
+ return true;
+ }
+
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
+ bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
+ if (unlikely(!ok)) {
return false;
}
}
@@ -842,6 +983,10 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
}
vhost_vdpa_set_vring_ready(dev);
} else {
+ ok = vhost_vdpa_svqs_stop(dev);
+ if (unlikely(!ok)) {
+ return -1;
+ }
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
}
--
2.27.0