From 6c330f39cc08e4c641a3567e2b6ad0ebcadf5165 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 21 Jun 2024 21:22:04 +0000 Subject: [PATCH] hw/arm/smmu-common: Bypass emulated IOTLB for a nested SMMU If a vSMMU is configured as a nested one, HW IOTLB will be used and all cache invalidation should be done to the HW IOTLB too, v.s. the emulated iotlb. In this case, an iommu notifier isn't registered, as the devices behind a nested SMMU would stay in the system address space for stage-2 mappings. However, the KVM code still requests an iommu address space to translate an MSI doorbell gIOVA via get_msi_address_space() and translate(). Since a nested SMMU doesn't register an iommu notifier to flush emulated iotlb, bypass the emulated IOTLB and always walk through the guest-level IO page table. Note that regular nested SMMU could still register an iommu notifier for IOTLB invalidation, since QEMU traps the invalidation commands. But this would result in invalidation inefficiency since each invlaidation would be doubled for both HW IOTLB and the emulated IOTLB. Also, with NVIDIA's CMDQV feature on its Grace SoC, invalidation commands are issued to the CMDQ HW direclty, without any trapping. So, there is no way to maintain the emulated IOTLB. Meanwhile, the stage-1 translation request from KVM is only activated in case of an MSI table update, which does not happen that often to impact performance if walking through the guest RAM every time. Signed-off-by: Nicolin Chen --- hw/arm/smmu-common.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index c5f3e02065..016418a48c 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -75,6 +75,16 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, uint8_t level = 4 - (inputsize - 4) / stride; SMMUTLBEntry *entry = NULL; + /* + * Stage-1 translation with a nested SMMU in general uses HW IOTLB. However, + * KVM still requests for an iommu address space for an MSI fixup by looking + * up stage-1 page table. Make sure we don't go through the emulated pathway + * so that the emulated iotlb will not need any invalidation. + */ + if (bs->nested) { + return NULL; + } + while (level <= 3) { uint64_t subpage_size = 1ULL << level_shift(level, tt->granule_sz); uint64_t mask = subpage_size - 1; @@ -110,6 +120,16 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new) SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1); uint8_t tg = (new->granule - 10) / 2; + /* + * Stage-1 translation with a nested SMMU in general uses HW IOTLB. However, + * KVM still requests for an iommu address space for an MSI fixup by looking + * up stage-1 page table. Make sure we don't go through the emulated pathway + * so that the emulated iotlb will not need any invalidation. + */ + if (bs->nested) { + return; + } + if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { smmu_iotlb_inv_all(bs); } -- 2.41.0.windows.1