From 39db503d4d2b594f12ae17fc950c7b953b3a2ee8 Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Wed, 29 Aug 2018 18:10:12 +0200 Subject: [PATCH] vfio: Set up nested stage mappings In nested mode, legacy vfio_iommu_map_notify cannot be used as there is no "caching" mode and we do not trap on map. On Intel, vfio_iommu_map_notify was used to DMA map the RAM through the host single stage. With nested mode, we need to setup the stage 2 and the stage 1 separately. This patch introduces a prereg_listener to setup the stage 2 mapping. The stage 1 mapping, owned by the guest, is passed to the host when the guest invalidates the stage 1 configuration, through a dedicated PCIPASIDOps callback. Guest IOTLB invalidations are cascaded downto the host through another IOMMU MR UNMAP notifier. Signed-off-by: Eric Auger Signed-off-by: Kunkun Jiang --- vfio-Set-up-nested-stage-mappings.patch | 277 ++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 vfio-Set-up-nested-stage-mappings.patch diff --git a/vfio-Set-up-nested-stage-mappings.patch b/vfio-Set-up-nested-stage-mappings.patch new file mode 100644 index 0000000..66659e8 --- /dev/null +++ b/vfio-Set-up-nested-stage-mappings.patch @@ -0,0 +1,277 @@ +From a65c40f9d1025a9843dec38070d9f26792b00892 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 29 Aug 2018 18:10:12 +0200 +Subject: [PATCH] vfio: Set up nested stage mappings + +In nested mode, legacy vfio_iommu_map_notify cannot be used as +there is no "caching" mode and we do not trap on map. + +On Intel, vfio_iommu_map_notify was used to DMA map the RAM +through the host single stage. + +With nested mode, we need to setup the stage 2 and the stage 1 +separately. This patch introduces a prereg_listener to setup +the stage 2 mapping. + +The stage 1 mapping, owned by the guest, is passed to the host +when the guest invalidates the stage 1 configuration, through +a dedicated PCIPASIDOps callback. Guest IOTLB invalidations +are cascaded downto the host through another IOMMU MR UNMAP +notifier. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 136 +++++++++++++++++++++++++++++++++++++++++-- + hw/vfio/pci.c | 21 +++++++ + hw/vfio/trace-events | 2 + + 3 files changed, 154 insertions(+), 5 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 8837d33c57..cc50efdbc1 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -642,6 +642,73 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + return true; + } + ++/* Propagate a guest IOTLB invalidation to the host (nested mode) */ ++static void vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) ++{ ++ VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); ++ struct vfio_iommu_type1_cache_invalidate ustruct = {}; ++ VFIOContainer *container = giommu->container; ++ int ret; ++ ++ assert(iotlb->perm == IOMMU_NONE); ++ ++ ustruct.argsz = sizeof(ustruct); ++ ustruct.flags = 0; ++ ustruct.info.argsz = sizeof(struct iommu_cache_invalidate_info); ++ ustruct.info.version = IOMMU_CACHE_INVALIDATE_INFO_VERSION_1; ++ ustruct.info.cache = IOMMU_CACHE_INV_TYPE_IOTLB; ++ ++ switch (iotlb->granularity) { ++ case IOMMU_INV_GRAN_DOMAIN: ++ ustruct.info.granularity = IOMMU_INV_GRANU_DOMAIN; ++ break; ++ case IOMMU_INV_GRAN_PASID: ++ { ++ struct iommu_inv_pasid_info *pasid_info; ++ int archid = -1; ++ ++ pasid_info = &ustruct.info.granu.pasid_info; ++ ustruct.info.granularity = IOMMU_INV_GRANU_PASID; ++ if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) { ++ pasid_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID; ++ archid = iotlb->arch_id; ++ } ++ pasid_info->archid = archid; ++ trace_vfio_iommu_asid_inv_iotlb(archid); ++ break; ++ } ++ case IOMMU_INV_GRAN_ADDR: ++ { ++ hwaddr start = iotlb->iova + giommu->iommu_offset; ++ struct iommu_inv_addr_info *addr_info; ++ size_t size = iotlb->addr_mask + 1; ++ int archid = -1; ++ ++ addr_info = &ustruct.info.granu.addr_info; ++ ustruct.info.granularity = IOMMU_INV_GRANU_ADDR; ++ if (iotlb->leaf) { ++ addr_info->flags |= IOMMU_INV_ADDR_FLAGS_LEAF; ++ } ++ if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) { ++ addr_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID; ++ archid = iotlb->arch_id; ++ } ++ addr_info->archid = archid; ++ addr_info->addr = start; ++ addr_info->granule_size = size; ++ addr_info->nb_granules = 1; ++ trace_vfio_iommu_addr_inv_iotlb(archid, start, size, ++ 1, iotlb->leaf); ++ break; ++ } ++ } ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_CACHE_INVALIDATE, &ustruct); ++ if (ret) { ++ error_report("%p: failed to invalidate CACHE (%d)", container, ret); ++ } ++} ++ + static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + { + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); +@@ -823,6 +890,32 @@ static void vfio_dma_unmap_ram_section(VFIOContainer *container, + } + } + ++static void vfio_prereg_listener_region_add(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = ++ container_of(listener, VFIOContainer, prereg_listener); ++ ++ if (!memory_region_is_ram(section->mr)) { ++ return; ++ } ++ ++ vfio_dma_map_ram_section(container, section); ++} ++ ++static void vfio_prereg_listener_region_del(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = ++ container_of(listener, VFIOContainer, prereg_listener); ++ ++ if (!memory_region_is_ram(section->mr)) { ++ return; ++ } ++ ++ vfio_dma_unmap_ram_section(container, section); ++} ++ + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +@@ -920,9 +1013,10 @@ static void vfio_listener_region_add(MemoryListener *listener, + memory_region_ref(section->mr); + + if (memory_region_is_iommu(section->mr)) { ++ IOMMUNotify notify; + VFIOGuestIOMMU *giommu; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); +- int iommu_idx; ++ int iommu_idx, flags; + + trace_vfio_listener_region_add_iommu(iova, end); + /* +@@ -941,15 +1035,27 @@ static void vfio_listener_region_add(MemoryListener *listener, + llend = int128_sub(llend, int128_one()); + iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, + MEMTXATTRS_UNSPECIFIED); +- iommu_notifier_init(&giommu->n, vfio_iommu_map_notify, +- IOMMU_NOTIFIER_ALL, ++ ++ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { ++ /* IOTLB unmap notifier to propagate guest IOTLB invalidations */ ++ flags = IOMMU_NOTIFIER_UNMAP; ++ notify = vfio_iommu_unmap_notify; ++ } else { ++ /* MAP/UNMAP IOTLB notifier */ ++ flags = IOMMU_NOTIFIER_ALL; ++ notify = vfio_iommu_map_notify; ++ } ++ ++ iommu_notifier_init(&giommu->n, notify, flags, + section->offset_within_region, + int128_get64(llend), + iommu_idx); + QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); + + memory_region_register_iommu_notifier(section->mr, &giommu->n); +- memory_region_iommu_replay(giommu->iommu, &giommu->n); ++ if (flags & IOMMU_NOTIFIER_MAP) { ++ memory_region_iommu_replay(giommu->iommu, &giommu->n); ++ } + + return; + } +@@ -1367,10 +1473,16 @@ static const MemoryListener vfio_memory_listener = { + .log_clear = vfio_listener_log_clear, + }; + ++static MemoryListener vfio_memory_prereg_listener = { ++ .region_add = vfio_prereg_listener_region_add, ++ .region_del = vfio_prereg_listener_region_del, ++}; ++ + static void vfio_listener_release(VFIOContainer *container) + { + memory_listener_unregister(&container->listener); +- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { ++ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || ++ container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { + memory_listener_unregister(&container->prereg_listener); + } + } +@@ -1976,6 +2088,20 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + vfio_get_iommu_info_migration(container, info); + } + g_free(info); ++ ++ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { ++ container->prereg_listener = vfio_memory_prereg_listener; ++ memory_listener_register(&container->prereg_listener, ++ &address_space_memory); ++ if (container->error) { ++ memory_listener_unregister(&container->prereg_listener); ++ ret = container->error; ++ error_setg(errp, ++ "RAM memory listener initialization failed " ++ "for container"); ++ goto free_container_exit; ++ } ++ } + break; + } + case VFIO_SPAPR_TCE_v2_IOMMU: +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 3641ad0c5c..6c90ec9278 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2766,6 +2766,25 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) + vdev->req_enabled = false; + } + ++static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn, ++ IOMMUConfig *config) ++{ ++ PCIDevice *pdev = bus->devices[devfn]; ++ VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); ++ VFIOContainer *container = vdev->vbasedev.group->container; ++ struct vfio_iommu_type1_set_pasid_table info; ++ ++ info.argsz = sizeof(info); ++ info.flags = VFIO_PASID_TABLE_FLAG_SET; ++ memcpy(&info.config, &config->pasid_cfg, sizeof(config->pasid_cfg)); ++ ++ return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info); ++} ++ ++static PCIPASIDOps vfio_pci_pasid_ops = { ++ .set_pasid_table = vfio_iommu_set_pasid_table, ++}; ++ + static void vfio_realize(PCIDevice *pdev, Error **errp) + { + VFIOPCIDevice *vdev = PCI_VFIO(pdev); +@@ -3072,6 +3091,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + vfio_register_req_notifier(vdev); + vfio_setup_resetfn_quirk(vdev); + ++ pci_setup_pasid_ops(pdev, &vfio_pci_pasid_ops); ++ + return; + + out_teardown: +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 9b6c7ca61b..ee9a67d3ef 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -118,6 +118,8 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" + vfio_dma_unmap_overflow_workaround(void) "" ++vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d" ++vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d" + + # platform.c + vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" +-- +2.27.0 +