From 9cdd7c19a08c773f1f8a2d314bb94d61bd08fd77 Mon Sep 17 00:00:00 2001 From: libai Date: Thu, 27 Mar 2025 16:51:03 +0800 Subject: [PATCH] vdpa/iommufd:Introduce vdpa-iommufd module The purpose of the vdpa-iommufd module is to share the DMA mapping of multiple vdpa through the kernel iommufd interface. The VDPA devices can share the same DMA mapping by associating with the same IOMMUFD backend. This can avoid VDPA devices from repeatedly establishing DMA mappings, reduce the time required for hot plugging and unplugging VDPA devices, and minimize duplicate IOMMU TLB. The vDPA devices that need to be isolated can also be divided into different groups by associating them with different iommufds. Each iommufd backend is associated with a VDPAIOMMUFDContainer to establish contact with multiple vDPA devices. To improve availability, even if vDPA devices encounter problems when sharing page tables, they can still complete DMA mapping by applying for a separate HWPT. Signed-off-by: libai --- hw/virtio/meson.build | 2 +- hw/virtio/vdpa-dev-iommufd.c | 294 +++++++++++++++++++++++++++ hw/virtio/vdpa-dev.c | 17 ++ include/hw/virtio/vdpa-dev-iommufd.h | 40 ++++ include/hw/virtio/vdpa-dev.h | 2 + linux-headers/linux/vhost.h | 28 +++ 6 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 hw/virtio/vdpa-dev-iommufd.c create mode 100644 include/hw/virtio/vdpa-dev-iommufd.h diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 596651d113..67291563d3 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -5,7 +5,7 @@ system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c') system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) -system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c', 'vdpa-dev-mig.c')) +system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c', 'vdpa-dev-mig.c', 'vdpa-dev-iommufd.c')) specific_virtio_ss = ss.source_set() specific_virtio_ss.add(files('virtio.c')) diff --git a/hw/virtio/vdpa-dev-iommufd.c b/hw/virtio/vdpa-dev-iommufd.c new file mode 100644 index 0000000000..d72f56d52f --- /dev/null +++ b/hw/virtio/vdpa-dev-iommufd.c @@ -0,0 +1,294 @@ +/* + * vhost vdpa device iommufd backend + * + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All Rights Reserved. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include +#include +#include "qapi/error.h" +#include "hw/virtio/vdpa-dev-iommufd.h" + +static QLIST_HEAD(, VDPAIOMMUFDContainer) vdpa_container_list = + QLIST_HEAD_INITIALIZER(vdpa_container_list); + +static int vhost_vdpa_container_connect_iommufd(VDPAIOMMUFDContainer *container) +{ + IOMMUFDBackend *iommufd = container->iommufd; + uint32_t ioas_id; + Error *err = NULL; + + if (!iommufd) { + return -1; + } + + if (!iommufd_backend_connect(iommufd, &err)) { + error_report_err(err); + return -1; + } + + if (!iommufd_backend_alloc_ioas(iommufd, &ioas_id, &err)) { + error_report_err(err); + iommufd_backend_disconnect(iommufd); + return -1; + } + container->ioas_id = ioas_id; + return 0; +} + +static void vhost_vdpa_container_disconnect_iommufd(VDPAIOMMUFDContainer *container) +{ + IOMMUFDBackend *iommufd = container->iommufd; + uint32_t ioas_id = container->ioas_id; + + if (!iommufd) { + return; + } + + iommufd_backend_free_id(iommufd, ioas_id); + iommufd_backend_disconnect(iommufd); +} + +static IOMMUFDHWPT *vhost_vdpa_find_hwpt(VDPAIOMMUFDContainer *container, + VhostVdpaDevice *vdev) +{ + IOMMUFDHWPT *hwpt = NULL; + VhostVdpaDevice *tmp = NULL; + + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + QLIST_FOREACH(tmp, &hwpt->device_list, next) { + if (tmp == vdev) { + return hwpt; + } + } + } + + return NULL; +} + +static VDPAIOMMUFDContainer *vhost_vdpa_find_container(VhostVdpaDevice *vdev) +{ + VDPAIOMMUFDContainer *container = NULL; + + QLIST_FOREACH(container, &vdpa_container_list, next) { + if (container->iommufd == vdev->iommufd) { + return container; + } + } + + return NULL; +} + +static VDPAIOMMUFDContainer *vhost_vdpa_create_container(VhostVdpaDevice *vdev) +{ + VDPAIOMMUFDContainer *container = NULL; + + container = g_new0(VDPAIOMMUFDContainer, 1); + container->iommufd = vdev->iommufd; + QLIST_INIT(&container->hwpt_list); + + QLIST_INSERT_HEAD(&vdpa_container_list, container, next); + + return container; +} + +static void vhost_vdpa_destroy_container(VDPAIOMMUFDContainer *container) +{ + if (!container) { + return; + } + + container->iommufd = NULL; + QLIST_SAFE_REMOVE(container, next); + g_free(container); +} + +static void vhost_vdpa_device_unbind_iommufd(VhostVdpaDevice *vdev) +{ + int ret; + ret = ioctl(vdev->vhostfd, VHOST_VDPA_UNBIND_IOMMUFD, 0); + if (ret) { + qemu_log("vhost vdpa device unbind iommufd failed: %d, devid: %d\n", + ret, vdev->iommufd_devid); + } +} + +static int vhost_vdpa_device_bind_iommufd(VhostVdpaDevice *vdev) +{ + IOMMUFDBackend *iommufd = vdev->iommufd; + struct vdpa_dev_bind_iommufd bind = { + .iommufd = iommufd->fd, + .out_devid = -1, + }; + int ret; + + /* iommufd auto unbind when vdev->vhostfd close */ + ret = ioctl(vdev->vhostfd, VHOST_VDPA_BIND_IOMMUFD, &bind); + if (ret) { + qemu_log("vhost vdpa device bind iommufd failed: %d\n", ret); + return ret; + } + vdev->iommufd_devid = bind.out_devid; + return 0; +} + +static int vhost_vdpa_container_attach_device(VDPAIOMMUFDContainer *container, VhostVdpaDevice *vdev) +{ + IOMMUFDBackend *iommufd = NULL; + IOMMUFDHWPT *hwpt = NULL; + Error *err = NULL; + uint32_t pt_id; + int ret; + + if (!container || !container->iommufd || container->iommufd != vdev->iommufd) { + return -1; + } + + iommufd = container->iommufd; + + /* try to find an available hwpt */ + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + pt_id = hwpt->hwpt_id; + ret = ioctl(vdev->vhostfd, VHOST_VDPA_ATTACH_IOMMUFD_PT, &pt_id); + if (ret == 0) { + QLIST_INSERT_HEAD(&hwpt->device_list, vdev, next); + return 0; + } + } + + /* available hwpt not found in the container, create a new one */ + hwpt = g_new0(IOMMUFDHWPT, 1); + QLIST_INIT(&hwpt->device_list); + + if (!iommufd_backend_alloc_hwpt(iommufd, vdev->iommufd_devid, + container->ioas_id, 0, 0, 0, NULL, + &pt_id, NULL, &err)) { + error_report_err(err); + ret = -1; + goto free_mem; + } + + hwpt->hwpt_id = pt_id; + + ret = ioctl(vdev->vhostfd, VHOST_VDPA_ATTACH_IOMMUFD_PT, &pt_id); + if (ret) { + qemu_log("vhost vdpa device attach iommufd pt failed: %d\n", ret); + goto free_hwpt; + } + + QLIST_INSERT_HEAD(&hwpt->device_list, vdev, next); + QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); + + return 0; + +free_hwpt: + iommufd_backend_free_id(iommufd, hwpt->hwpt_id); +free_mem: + g_free(hwpt); + return ret; +} + +static void vhost_vdpa_container_detach_device(VDPAIOMMUFDContainer *container, VhostVdpaDevice *vdev) +{ + IOMMUFDBackend *iommufd = vdev->iommufd; + IOMMUFDHWPT *hwpt = NULL; + + /* find the hwpt using by this device */ + hwpt = vhost_vdpa_find_hwpt(container, vdev); + if (!hwpt) { + return; + } + + ioctl(vdev->vhostfd, VHOST_VDPA_DETACH_IOMMUFD_PT, &hwpt->hwpt_id); + + QLIST_SAFE_REMOVE(vdev, next); + + /* No device using this hwpt, free it */ + if (QLIST_EMPTY(&hwpt->device_list)) { + iommufd_backend_free_id(iommufd, hwpt->hwpt_id); + QLIST_SAFE_REMOVE(hwpt, next); + g_free(hwpt); + } +} + +int vhost_vdpa_attach_container(VhostVdpaDevice *vdev) +{ + VDPAIOMMUFDContainer *container = NULL; + IOMMUFDBackend *iommufd = vdev->iommufd; + bool new_container = false; + int ret = 0; + + if (!iommufd) { + return 0; + } + + container = vhost_vdpa_find_container(vdev); + if (!container) { + container = vhost_vdpa_create_container(vdev); + if (!container) { + qemu_log("vdpa create container failed\n"); + return -1; + } + ret = vhost_vdpa_container_connect_iommufd(container); + if (ret) { + qemu_log("vdpa container connect iommufd failed\n"); + goto destroy; + } + new_container = true; + } + + ret = vhost_vdpa_device_bind_iommufd(vdev); + if (ret) { + qemu_log("vdpa device bind iommufd failed\n"); + goto disconnect; + } + + ret = vhost_vdpa_container_attach_device(container, vdev); + if (ret) { + qemu_log("vdpa container attach device failed\n"); + goto unbind; + } + + return 0; + +unbind: + vhost_vdpa_device_unbind_iommufd(vdev); +disconnect: + if (!new_container) { + return ret; + } + vhost_vdpa_container_disconnect_iommufd(container); +destroy: + vhost_vdpa_destroy_container(container); + + return ret; +} + +void vhost_vdpa_detach_container(VhostVdpaDevice *vdev) +{ + VDPAIOMMUFDContainer *container = NULL; + IOMMUFDBackend *iommufd = vdev->iommufd; + + if (!iommufd) { + return; + } + + container = vhost_vdpa_find_container(vdev); + if (!container) { + return; + } + + vhost_vdpa_container_detach_device(container, vdev); + + vhost_vdpa_device_unbind_iommufd(vdev); + + if (!QLIST_EMPTY(&container->hwpt_list)) { + return; + } + /* No HWPT in this container, destroy it */ + vhost_vdpa_container_disconnect_iommufd(container); + + vhost_vdpa_destroy_container(container); +} \ No newline at end of file diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 9ce7ed7eae..a6bd695724 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -32,6 +32,7 @@ #include "migration/migration.h" #include "exec/address-spaces.h" #include "standard-headers/linux/virtio_ids.h" +#include "hw/virtio/vdpa-dev-iommufd.h" static void vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) @@ -127,6 +128,16 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) goto free_vqs; } + /* If the vdpa device is associated with an iommufd, attach device to container */ + if (v->iommufd) { + ret = vhost_vdpa_attach_container(v); + if (ret < 0) { + error_setg(errp, "vhost vdpa device attach container failed: %s", + strerror(-ret)); + goto free_vqs; + } + } + memory_listener_register(&v->vdpa.listener, &address_space_memory); v->config_size = vhost_vdpa_device_get_u32(v->vhostfd, VHOST_VDPA_GET_CONFIG_SIZE, @@ -168,6 +179,9 @@ free_config: vhost_cleanup: memory_listener_unregister(&v->vdpa.listener); vhost_dev_cleanup(&v->dev); + if (v->iommufd) { + vhost_vdpa_detach_container(v); + } free_vqs: g_free(vqs); out: @@ -194,6 +208,9 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) g_free(s->dev.vqs); memory_listener_unregister(&s->vdpa.listener); vhost_dev_cleanup(&s->dev); + if (s->iommufd) { + vhost_vdpa_detach_container(s); + } qemu_close(s->vhostfd); s->vhostfd = -1; } diff --git a/include/hw/virtio/vdpa-dev-iommufd.h b/include/hw/virtio/vdpa-dev-iommufd.h new file mode 100644 index 0000000000..dc14d9dd15 --- /dev/null +++ b/include/hw/virtio/vdpa-dev-iommufd.h @@ -0,0 +1,40 @@ +/* + * vhost vDPA device support iommufd header + * + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All Rights Reserved. + */ + +#ifndef _VHOST_VDPA_IOMMUFD_H +#define _VHOST_VDPA_IOMMUFD_H + +#include "hw/virtio/vdpa-dev.h" + +/* + * A HW pagetable is called an iommu_domain inside the kernel. + * This user object allows directly creating an inspecting the + * domains. Domains that have kernel owned page tables will be + * associated with an iommufd_ioas that provides the IOVA to + * PFN map. + */ +typedef struct IOMMUFDHWPT { + uint32_t hwpt_id; + QLIST_HEAD(, VhostVdpaDevice) device_list; + QLIST_ENTRY(IOMMUFDHWPT) next; +} IOMMUFDHWPT; + +typedef struct VDPAIOMMUFDContainer { + struct IOMMUFDBackend *iommufd; + uint32_t ioas_id; + QLIST_HEAD(, IOMMUFDHWPT) hwpt_list; + QLIST_ENTRY(VDPAIOMMUFDContainer) next; +} VDPAIOMMUFDContainer; + +struct vdpa_dev_bind_iommufd { + __s32 iommufd; + __u32 out_devid; +}; + +int vhost_vdpa_attach_container(VhostVdpaDevice *vdev); +void vhost_vdpa_detach_container(VhostVdpaDevice *vdev); + +#endif /* _VHOST_VDPA_IOMMUFD_H */ diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h index accdb7fa28..872e630546 100644 --- a/include/hw/virtio/vdpa-dev.h +++ b/include/hw/virtio/vdpa-dev.h @@ -43,6 +43,8 @@ struct VhostVdpaDevice { VMChangeStateEntry *vmstate; Notifier migration_state; IOMMUFDBackend *iommufd; + uint32_t iommufd_devid; + QLIST_ENTRY(VhostVdpaDevice) next; }; #endif diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index a08e980a1e..f5c05abe8b 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -232,6 +232,34 @@ #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ struct vhost_vring_state) +/* Bind a vDPA device to the specified iommufd + * + * After the return of this ioctl, the vDPA device is binded to the specified + * iommufd, and the device id is also returned. + */ +#define VHOST_VDPA_BIND_IOMMUFD _IO(VHOST_VIRTIO, 0x90) + +/* Unbind a vDPA device from the specified iommufd + * + * After the return of this ioctl, the vDPA device is unbinded from the specified + * iommufd. + */ +#define VHOST_VDPA_UNBIND_IOMMUFD _IO(VHOST_VIRTIO, 0x91) + +/* Associate the vDPA device with an address space within the bound iommufd + * + * After the return of this ioctl, the vDPA device is attached to the bound + * iommufd. + */ +#define VHOST_VDPA_ATTACH_IOMMUFD_PT _IO(VHOST_VIRTIO, 0x92) + +/* Detach the vDPA device from an address space within the bound iommufd. + * + * After the return of this ioctl, the vDPA device is detached from the address + * space within the bound iommufd. + */ +#define VHOST_VDPA_DETACH_IOMMUFD_PT _IO(VHOST_VIRTIO, 0x93) + /* set and get device buffer */ #define VHOST_GET_DEV_BUFFER _IOR(VHOST_VIRTIO, 0xb0, struct vhost_vdpa_config) #define VHOST_SET_DEV_BUFFER _IOW(VHOST_VIRTIO, 0xb1, struct vhost_vdpa_config) -- 2.41.0.windows.1