76 lines
3.1 KiB
Diff
76 lines
3.1 KiB
Diff
|
|
From 6c330f39cc08e4c641a3567e2b6ad0ebcadf5165 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Nicolin Chen <nicolinc@nvidia.com>
|
||
|
|
Date: Fri, 21 Jun 2024 21:22:04 +0000
|
||
|
|
Subject: [PATCH] hw/arm/smmu-common: Bypass emulated IOTLB for a nested SMMU
|
||
|
|
|
||
|
|
If a vSMMU is configured as a nested one, HW IOTLB will be used and all
|
||
|
|
cache invalidation should be done to the HW IOTLB too, v.s. the emulated
|
||
|
|
iotlb. In this case, an iommu notifier isn't registered, as the devices
|
||
|
|
behind a nested SMMU would stay in the system address space for stage-2
|
||
|
|
mappings.
|
||
|
|
|
||
|
|
However, the KVM code still requests an iommu address space to translate
|
||
|
|
an MSI doorbell gIOVA via get_msi_address_space() and translate().
|
||
|
|
|
||
|
|
Since a nested SMMU doesn't register an iommu notifier to flush emulated
|
||
|
|
iotlb, bypass the emulated IOTLB and always walk through the guest-level
|
||
|
|
IO page table.
|
||
|
|
|
||
|
|
Note that regular nested SMMU could still register an iommu notifier for
|
||
|
|
IOTLB invalidation, since QEMU traps the invalidation commands. But this
|
||
|
|
would result in invalidation inefficiency since each invlaidation would
|
||
|
|
be doubled for both HW IOTLB and the emulated IOTLB. Also, with NVIDIA's
|
||
|
|
CMDQV feature on its Grace SoC, invalidation commands are issued to the
|
||
|
|
CMDQ HW direclty, without any trapping. So, there is no way to maintain
|
||
|
|
the emulated IOTLB. Meanwhile, the stage-1 translation request from KVM
|
||
|
|
is only activated in case of an MSI table update, which does not happen
|
||
|
|
that often to impact performance if walking through the guest RAM every
|
||
|
|
time.
|
||
|
|
|
||
|
|
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
|
||
|
|
---
|
||
|
|
hw/arm/smmu-common.c | 20 ++++++++++++++++++++
|
||
|
|
1 file changed, 20 insertions(+)
|
||
|
|
|
||
|
|
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
|
||
|
|
index c5f3e02065..016418a48c 100644
|
||
|
|
--- a/hw/arm/smmu-common.c
|
||
|
|
+++ b/hw/arm/smmu-common.c
|
||
|
|
@@ -75,6 +75,16 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
|
||
|
|
uint8_t level = 4 - (inputsize - 4) / stride;
|
||
|
|
SMMUTLBEntry *entry = NULL;
|
||
|
|
|
||
|
|
+ /*
|
||
|
|
+ * Stage-1 translation with a nested SMMU in general uses HW IOTLB. However,
|
||
|
|
+ * KVM still requests for an iommu address space for an MSI fixup by looking
|
||
|
|
+ * up stage-1 page table. Make sure we don't go through the emulated pathway
|
||
|
|
+ * so that the emulated iotlb will not need any invalidation.
|
||
|
|
+ */
|
||
|
|
+ if (bs->nested) {
|
||
|
|
+ return NULL;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
while (level <= 3) {
|
||
|
|
uint64_t subpage_size = 1ULL << level_shift(level, tt->granule_sz);
|
||
|
|
uint64_t mask = subpage_size - 1;
|
||
|
|
@@ -110,6 +120,16 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new)
|
||
|
|
SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1);
|
||
|
|
uint8_t tg = (new->granule - 10) / 2;
|
||
|
|
|
||
|
|
+ /*
|
||
|
|
+ * Stage-1 translation with a nested SMMU in general uses HW IOTLB. However,
|
||
|
|
+ * KVM still requests for an iommu address space for an MSI fixup by looking
|
||
|
|
+ * up stage-1 page table. Make sure we don't go through the emulated pathway
|
||
|
|
+ * so that the emulated iotlb will not need any invalidation.
|
||
|
|
+ */
|
||
|
|
+ if (bs->nested) {
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) {
|
||
|
|
smmu_iotlb_inv_all(bs);
|
||
|
|
}
|
||
|
|
--
|
||
|
|
2.41.0.windows.1
|
||
|
|
|