!1097 [sync] PR-1091: QEMU update to version 8.2.0-30:

From: @openeuler-sync-bot 
Reviewed-by: @imxcc 
Signed-off-by: @imxcc
This commit is contained in:
openeuler-ci-bot 2025-05-14 09:10:44 +00:00 committed by Gitee
commit 0885db8f90
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
174 changed files with 20370 additions and 1 deletions

View File

@ -0,0 +1,43 @@
From 458d90e226d5833661f9257f6af57c14f9b9bdfe Mon Sep 17 00:00:00 2001
From: gongchangsui <gongchangsui@outlook.com>
Date: Mon, 17 Mar 2025 02:52:21 -0400
Subject: [PATCH] BUGFIX: Enforce isolation for virtcca_shared_hugepage
Add memory isolation enforcement when virtcca hugepage is disabled.
Signed-off-by: gongchangsui <gongchangsui@outlook.com>
---
hw/core/numa.c | 3 ++-
hw/virtio/vhost.c | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/hw/core/numa.c b/hw/core/numa.c
index e7c48dab61..c691578ef5 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -728,7 +728,8 @@ void numa_complete_configuration(MachineState *ms)
memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id,
ms->ram_size);
numa_init_memdev_container(ms, ms->ram);
- if (virtcca_cvm_enabled() && virtcca_shared_hugepage->ram_block) {
+ if (virtcca_cvm_enabled() && virtcca_shared_hugepage &&
+ virtcca_shared_hugepage->ram_block) {
virtcca_shared_memory_configuration(ms);
}
}
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 8b95558013..4bf0b03977 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1617,7 +1617,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
hdev->log_size = 0;
hdev->log_enabled = false;
hdev->started = false;
- if (virtcca_cvm_enabled()) {
+ if (virtcca_cvm_enabled() && virtcca_shared_hugepage && virtcca_shared_hugepage->ram_block) {
memory_listener_register(&hdev->memory_listener,
&address_space_virtcca_shared_memory);
} else {
--
2.41.0.windows.1

Binary file not shown.

View File

@ -0,0 +1,93 @@
From 53a82c6a5a22bb41e9bd3f754479baf4ce0845bf Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Mon, 5 Aug 2024 09:29:00 +0800
Subject: [PATCH] HostIOMMUDevice: Introduce realize_late callback
Previously we have a realize() callback which is called before attachment.
But there are still some elements e.g., ioas not ready before attachment.
So we need a realize_late() callback to further initialize them.
Currently, this callback is only useful for iommufd backend. For legacy
backend nothing needs to be initialized after attachment.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/vfio/common.c | 18 +++++++++++++++---
include/sysemu/host_iommu_device.h | 17 +++++++++++++++++
2 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index a8bc1c6055..0be63c5fbc 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1654,6 +1654,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
const VFIOIOMMUClass *ops =
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
HostIOMMUDevice *hiod = NULL;
+ HostIOMMUDeviceClass *hiod_ops = NULL;
int ret;
if (vbasedev->iommufd) {
@@ -1664,17 +1665,28 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
if (!vbasedev->mdev) {
hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
+ hiod_ops = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
vbasedev->hiod = hiod;
}
ret = ops->attach_device(name, vbasedev, as, errp);
if (ret) {
- object_unref(hiod);
- vbasedev->hiod = NULL;
- return ret;
+ goto err_attach;
+ }
+
+ if (hiod_ops && hiod_ops->realize_late &&
+ !hiod_ops->realize_late(hiod, vbasedev, errp)) {
+ ops->detach_device(vbasedev);
+ ret = -EINVAL;
+ goto err_attach;
}
return 0;
+
+err_attach:
+ object_unref(hiod);
+ vbasedev->hiod = NULL;
+ return ret;
}
void vfio_detach_device(VFIODevice *vbasedev)
diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
index e4d8300350..84131f5495 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -64,6 +64,23 @@ struct HostIOMMUDeviceClass {
* Returns: true on success, false on failure.
*/
bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp);
+ /**
+ * @realize_late: initialize host IOMMU device instance after attachment,
+ * some elements e.g., ioas are ready only after attachment.
+ * This callback initialize them.
+ *
+ * Optional callback.
+ *
+ * @hiod: pointer to a host IOMMU device instance.
+ *
+ * @opaque: pointer to agent device of this host IOMMU device,
+ * e.g., VFIO base device or VDPA device.
+ *
+ * @errp: pass an Error out when realize fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+ bool (*realize_late)(HostIOMMUDevice *hiod, void *opaque, Error **errp);
/**
* @get_cap: check if a host IOMMU device capability is supported.
*
--
2.41.0.windows.1

View File

@ -0,0 +1,57 @@
From 35f33bf18826286c9e9fc739a893b9915c71f43c Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Fri, 14 Jun 2024 11:52:51 +0200
Subject: [PATCH] HostIOMMUDevice: Store the VFIO/VDPA agent
Store the agent device (VFIO or VDPA) in the host IOMMU device.
This will allow easy access to some of its resources.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/vfio/container.c | 1 +
hw/vfio/iommufd.c | 2 ++
include/sysemu/host_iommu_device.h | 1 +
3 files changed, 4 insertions(+)
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 10f7635425..8a5a112b6b 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1259,6 +1259,7 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
hiod->name = g_strdup(vdev->name);
hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev);
+ hiod->agent = opaque;
return true;
}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 3b75cba26c..7a069ca576 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -735,6 +735,8 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
} data;
uint64_t hw_caps;
+ hiod->agent = opaque;
+
if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
&type, &data, sizeof(data),
&hw_caps, errp)) {
diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
index a57873958b..3e5f058e7b 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -34,6 +34,7 @@ struct HostIOMMUDevice {
Object parent_obj;
char *name;
+ void *agent; /* pointer to agent device, ie. VFIO or VDPA device */
HostIOMMUDeviceCaps caps;
};
--
2.41.0.windows.1

View File

@ -0,0 +1,32 @@
From c0717e82e34f96af456309b3786a6808e8e324e4 Mon Sep 17 00:00:00 2001
From: huangyan <huangyan@cdjrlc.com>
Date: Wed, 16 Apr 2025 00:43:27 +0800
Subject: [PATCH] Revert "linux-user: Print tid not pid with strace"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit 2f37362de1d971cc90c35405705bfa22a33f6cd8.
* this change is incomplete, "get_task_state" lacks the implementation.
* Moreover, it requires all calls to the "getpid" function to be changed to use "get_task_state", it would cause too much disruptionand it has not been applied in the upstream 8.2.0.
---
linux-user/strace.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/linux-user/strace.c b/linux-user/strace.c
index ac9177ebe4..cf26e55264 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -4176,7 +4176,7 @@ print_syscall(CPUArchState *cpu_env, int num,
if (!f) {
return;
}
- fprintf(f, "%d ", get_task_state(env_cpu(cpu_env))->ts_tid);
+ fprintf(f, "%d ", getpid());
for (i = 0; i < nsyscalls; i++) {
if (scnames[i].nr == num) {
--
2.41.0.windows.1

View File

@ -0,0 +1,514 @@
From ac715e361fdb6d92169b3b3f5964405c816a13ac Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Tue, 14 Jan 2025 10:29:24 +0000
Subject: [PATCH] Update iommufd.h header for vSVA
This is based on Linaro UADK branch:
https://github.com/Linaro/linux-kernel-uadk/tree/6.12-wip-10.26
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
linux-headers/linux/iommufd.h | 394 ++++++++++++++++++++++++++++++++--
1 file changed, 371 insertions(+), 23 deletions(-)
diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h
index 806d98d09c..41559c6064 100644
--- a/linux-headers/linux/iommufd.h
+++ b/linux-headers/linux/iommufd.h
@@ -37,18 +37,22 @@
enum {
IOMMUFD_CMD_BASE = 0x80,
IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
- IOMMUFD_CMD_IOAS_ALLOC,
- IOMMUFD_CMD_IOAS_ALLOW_IOVAS,
- IOMMUFD_CMD_IOAS_COPY,
- IOMMUFD_CMD_IOAS_IOVA_RANGES,
- IOMMUFD_CMD_IOAS_MAP,
- IOMMUFD_CMD_IOAS_UNMAP,
- IOMMUFD_CMD_OPTION,
- IOMMUFD_CMD_VFIO_IOAS,
- IOMMUFD_CMD_HWPT_ALLOC,
- IOMMUFD_CMD_GET_HW_INFO,
- IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
- IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
+ IOMMUFD_CMD_IOAS_ALLOC = 0x81,
+ IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
+ IOMMUFD_CMD_IOAS_COPY = 0x83,
+ IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
+ IOMMUFD_CMD_IOAS_MAP = 0x85,
+ IOMMUFD_CMD_IOAS_UNMAP = 0x86,
+ IOMMUFD_CMD_OPTION = 0x87,
+ IOMMUFD_CMD_VFIO_IOAS = 0x88,
+ IOMMUFD_CMD_HWPT_ALLOC = 0x89,
+ IOMMUFD_CMD_GET_HW_INFO = 0x8a,
+ IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
+ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
+ IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
+ IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
+ IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
+ IOMMUFD_CMD_VDEVICE_ALLOC = 0x90,
};
/**
@@ -355,10 +359,13 @@ struct iommu_vfio_ioas {
* the parent HWPT in a nesting configuration.
* @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
* enforced on device attachment
+ * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
+ * valid.
*/
enum iommufd_hwpt_alloc_flags {
IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
+ IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
};
/**
@@ -389,14 +396,34 @@ struct iommu_hwpt_vtd_s1 {
__u32 __reserved;
};
+/**
+ * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 Context Descriptor Table info
+ * (IOMMU_HWPT_DATA_ARM_SMMUV3)
+ *
+ * @ste: The first two double words of the user space Stream Table Entry for
+ * a user stage-1 Context Descriptor Table. Must be little-endian.
+ * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
+ * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
+ * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
+ *
+ * -EIO will be returned if @ste is not legal or contains any non-allowed field.
+ * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
+ * nested domain will translate the same as the nesting parent.
+ */
+struct iommu_hwpt_arm_smmuv3 {
+ __aligned_le64 ste[2];
+};
+
/**
* enum iommu_hwpt_data_type - IOMMU HWPT Data Type
* @IOMMU_HWPT_DATA_NONE: no data
* @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
+ * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
*/
enum iommu_hwpt_data_type {
- IOMMU_HWPT_DATA_NONE,
- IOMMU_HWPT_DATA_VTD_S1,
+ IOMMU_HWPT_DATA_NONE = 0,
+ IOMMU_HWPT_DATA_VTD_S1 = 1,
+ IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,
};
/**
@@ -404,12 +431,15 @@ enum iommu_hwpt_data_type {
* @size: sizeof(struct iommu_hwpt_alloc)
* @flags: Combination of enum iommufd_hwpt_alloc_flags
* @dev_id: The device to allocate this HWPT for
- * @pt_id: The IOAS or HWPT to connect this HWPT to
+ * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to
* @out_hwpt_id: The ID of the new HWPT
* @__reserved: Must be 0
* @data_type: One of enum iommu_hwpt_data_type
* @data_len: Length of the type specific data
* @data_uptr: User pointer to the type specific data
+ * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
+ * IOMMU_HWPT_FAULT_ID_VALID is set.
+ * @__reserved2: Padding to 64-bit alignment. Must be 0.
*
* Explicitly allocate a hardware page table object. This is the same object
* type that is returned by iommufd_device_attach() and represents the
@@ -420,11 +450,13 @@ enum iommu_hwpt_data_type {
* IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
* nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
*
- * A user-managed nested HWPT will be created from a given parent HWPT via
- * @pt_id, in which the parent HWPT must be allocated previously via the
- * same ioctl from a given IOAS (@pt_id). In this case, the @data_type
- * must be set to a pre-defined type corresponding to an I/O page table
- * type supported by the underlying IOMMU hardware.
+ * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a
+ * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be
+ * allocated previously via the same ioctl from a given IOAS (@pt_id). In this
+ * case, the @data_type must be set to a pre-defined type corresponding to an
+ * I/O page table type supported by the underlying IOMMU hardware. The device
+ * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU
+ * instance.
*
* If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
* @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
@@ -440,6 +472,8 @@ struct iommu_hwpt_alloc {
__u32 data_type;
__u32 data_len;
__aligned_u64 data_uptr;
+ __u32 fault_id;
+ __u32 __reserved2;
};
#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
@@ -474,15 +508,50 @@ struct iommu_hw_info_vtd {
__aligned_u64 ecap_reg;
};
+/**
+ * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
+ * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
+ *
+ * @flags: Must be set to 0
+ * @__reserved: Must be 0
+ * @idr: Implemented features for ARM SMMU Non-secure programming interface
+ * @iidr: Information about the implementation and implementer of ARM SMMU,
+ * and architecture version supported
+ * @aidr: ARM SMMU architecture version
+ *
+ * For the details of @idr, @iidr and @aidr, please refer to the chapters
+ * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
+ *
+ * User space should read the underlying ARM SMMUv3 hardware information for
+ * the list of supported features.
+ *
+ * Note that these values reflect the raw HW capability, without any insight if
+ * any required kernel driver support is present. Bits may be set indicating the
+ * HW has functionality that is lacking kernel software support, such as BTM. If
+ * a VMM is using this information to construct emulated copies of these
+ * registers it should only forward bits that it knows it can support.
+ *
+ * In future, presence of required kernel support will be indicated in flags.
+ */
+struct iommu_hw_info_arm_smmuv3 {
+ __u32 flags;
+ __u32 __reserved;
+ __u32 idr[6];
+ __u32 iidr;
+ __u32 aidr;
+};
+
/**
* enum iommu_hw_info_type - IOMMU Hardware Info Types
* @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
* info
* @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
+ * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
*/
enum iommu_hw_info_type {
- IOMMU_HW_INFO_TYPE_NONE,
- IOMMU_HW_INFO_TYPE_INTEL_VTD,
+ IOMMU_HW_INFO_TYPE_NONE = 0,
+ IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
+ IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
};
/**
@@ -494,9 +563,17 @@ enum iommu_hw_info_type {
* IOMMU_HWPT_GET_DIRTY_BITMAP
* IOMMU_HWPT_SET_DIRTY_TRACKING
*
+ * @IOMMU_HW_CAP_PASID_EXEC: Execute Permission Supported, user ignores it
+ * when the struct iommu_hw_info::out_max_pasid_log2
+ * is zero.
+ * @IOMMU_HW_CAP_PASID_PRIV: Privileged Mode Supported, user ignores it
+ * when the struct iommu_hw_info::out_max_pasid_log2
+ * is zero.
*/
enum iommufd_hw_capabilities {
IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
+ IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1,
+ IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2,
};
/**
@@ -512,6 +589,9 @@ enum iommufd_hw_capabilities {
* iommu_hw_info_type.
* @out_capabilities: Output the generic iommu capability info type as defined
* in the enum iommu_hw_capabilities.
+ * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support.
+ * PCI devices turn to out_capabilities to check if the
+ * specific capabilities is supported or not.
* @__reserved: Must be 0
*
* Query an iommu type specific hardware information data from an iommu behind
@@ -535,7 +615,8 @@ struct iommu_hw_info {
__u32 data_len;
__aligned_u64 data_uptr;
__u32 out_data_type;
- __u32 __reserved;
+ __u8 out_max_pasid_log2;
+ __u8 __reserved[3];
__aligned_u64 out_capabilities;
};
#define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
@@ -613,4 +694,271 @@ struct iommu_hwpt_get_dirty_bitmap {
#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
+/**
+ * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
+ * Data Type
+ * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
+ * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
+ */
+enum iommu_hwpt_invalidate_data_type {
+ IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
+ IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1,
+};
+
+/**
+ * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
+ * stage-1 cache invalidation
+ * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
+ * to all-levels page structure cache or just
+ * the leaf PTE cache.
+ */
+enum iommu_hwpt_vtd_s1_invalidate_flags {
+ IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
+};
+
+/**
+ * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
+ * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
+ * @addr: The start address of the range to be invalidated. It needs to
+ * be 4KB aligned.
+ * @npages: Number of contiguous 4K pages to be invalidated.
+ * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
+ * @__reserved: Must be 0
+ *
+ * The Intel VT-d specific invalidation data for user-managed stage-1 cache
+ * invalidation in nested translation. Userspace uses this structure to
+ * tell the impacted cache scope after modifying the stage-1 page table.
+ *
+ * Invalidating all the caches related to the page table by setting @addr
+ * to be 0 and @npages to be U64_MAX.
+ *
+ * The device TLB will be invalidated automatically if ATS is enabled.
+ */
+struct iommu_hwpt_vtd_s1_invalidate {
+ __aligned_u64 addr;
+ __aligned_u64 npages;
+ __u32 flags;
+ __u32 __reserved;
+};
+
+/**
+ * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation
+ * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
+ * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
+ * Must be little-endian.
+ *
+ * Supported command list only when passing in a vIOMMU via @hwpt_id:
+ * CMDQ_OP_TLBI_NSNH_ALL
+ * CMDQ_OP_TLBI_NH_VA
+ * CMDQ_OP_TLBI_NH_VAA
+ * CMDQ_OP_TLBI_NH_ALL
+ * CMDQ_OP_TLBI_NH_ASID
+ * CMDQ_OP_ATC_INV
+ * CMDQ_OP_CFGI_CD
+ * CMDQ_OP_CFGI_CD_ALL
+ *
+ * -EIO will be returned if the command is not supported.
+ */
+struct iommu_viommu_arm_smmuv3_invalidate {
+ __aligned_le64 cmd[2];
+};
+
+/**
+ * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
+ * @size: sizeof(struct iommu_hwpt_invalidate)
+ * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation
+ * @data_uptr: User pointer to an array of driver-specific cache invalidation
+ * data.
+ * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
+ * type of all the entries in the invalidation request array. It
+ * should be a type supported by the hwpt pointed by @hwpt_id.
+ * @entry_len: Length (in bytes) of a request entry in the request array
+ * @entry_num: Input the number of cache invalidation requests in the array.
+ * Output the number of requests successfully handled by kernel.
+ * @__reserved: Must be 0.
+ *
+ * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications
+ * on a user-managed page table should be followed by this operation, if a HWPT
+ * is passed in via @hwpt_id. Other caches, such as device cache or descriptor
+ * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field.
+ *
+ * Each ioctl can support one or more cache invalidation requests in the array
+ * that has a total size of @entry_len * @entry_num.
+ *
+ * An empty invalidation request array by setting @entry_num==0 is allowed, and
+ * @entry_len and @data_uptr would be ignored in this case. This can be used to
+ * check if the given @data_type is supported or not by kernel.
+ */
+struct iommu_hwpt_invalidate {
+ __u32 size;
+ __u32 hwpt_id;
+ __aligned_u64 data_uptr;
+ __u32 data_type;
+ __u32 entry_len;
+ __u32 entry_num;
+ __u32 __reserved;
+};
+#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
+
+/**
+ * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
+ * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
+ * valid.
+ * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
+ */
+enum iommu_hwpt_pgfault_flags {
+ IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0),
+ IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1),
+};
+
+/**
+ * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
+ * @IOMMU_PGFAULT_PERM_READ: request for read permission
+ * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
+ * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
+ * Execute Requested bit set in PASID TLP Prefix.
+ * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
+ * Privileged Mode Requested bit set in PASID TLP
+ * Prefix.
+ */
+enum iommu_hwpt_pgfault_perm {
+ IOMMU_PGFAULT_PERM_READ = (1 << 0),
+ IOMMU_PGFAULT_PERM_WRITE = (1 << 1),
+ IOMMU_PGFAULT_PERM_EXEC = (1 << 2),
+ IOMMU_PGFAULT_PERM_PRIV = (1 << 3),
+};
+
+/**
+ * struct iommu_hwpt_pgfault - iommu page fault data
+ * @flags: Combination of enum iommu_hwpt_pgfault_flags
+ * @dev_id: id of the originated device
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: Combination of enum iommu_hwpt_pgfault_perm
+ * @addr: Fault address
+ * @length: a hint of how much data the requestor is expecting to fetch. For
+ * example, if the PRI initiator knows it is going to do a 10MB
+ * transfer, it could fill in 10MB and the OS could pre-fault in
+ * 10MB of IOVA. It's default to 0 if there's no such hint.
+ * @cookie: kernel-managed cookie identifying a group of fault messages. The
+ * cookie number encoded in the last page fault of the group should
+ * be echoed back in the response message.
+ */
+struct iommu_hwpt_pgfault {
+ __u32 flags;
+ __u32 dev_id;
+ __u32 pasid;
+ __u32 grpid;
+ __u32 perm;
+ __u64 addr;
+ __u32 length;
+ __u32 cookie;
+};
+
+/**
+ * enum iommufd_page_response_code - Return status of fault handlers
+ * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
+ * populated, retry the access. This is the
+ * "Success" defined in PCI 10.4.2.1.
+ * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
+ * access. This is the "Invalid Request" in PCI
+ * 10.4.2.1.
+ */
+enum iommufd_page_response_code {
+ IOMMUFD_PAGE_RESP_SUCCESS = 0,
+ IOMMUFD_PAGE_RESP_INVALID = 1,
+};
+
+/**
+ * struct iommu_hwpt_page_response - IOMMU page fault response
+ * @cookie: The kernel-managed cookie reported in the fault message.
+ * @code: One of response code in enum iommufd_page_response_code.
+ */
+struct iommu_hwpt_page_response {
+ __u32 cookie;
+ __u32 code;
+};
+
+/**
+ * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
+ * @size: sizeof(struct iommu_fault_alloc)
+ * @flags: Must be 0
+ * @out_fault_id: The ID of the new FAULT
+ * @out_fault_fd: The fd of the new FAULT
+ *
+ * Explicitly allocate a fault handling object.
+ */
+struct iommu_fault_alloc {
+ __u32 size;
+ __u32 flags;
+ __u32 out_fault_id;
+ __u32 out_fault_fd;
+};
+#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
+
+/**
+ * enum iommu_viommu_type - Virtual IOMMU Type
+ * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
+ * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
+ */
+enum iommu_viommu_type {
+ IOMMU_VIOMMU_TYPE_DEFAULT = 0,
+ IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
+};
+
+/**
+ * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
+ * @size: sizeof(struct iommu_viommu_alloc)
+ * @flags: Must be 0
+ * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
+ * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU
+ * @hwpt_id: ID of a nesting parent HWPT to associate to
+ * @out_viommu_id: Output virtual IOMMU ID for the allocated object
+ *
+ * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's
+ * virtualization support that is a security-isolated slice of the real IOMMU HW
+ * that is unique to a specific VM. Operations global to the IOMMU are connected
+ * to the vIOMMU, such as:
+ * - Security namespace for guest owned ID, e.g. guest-controlled cache tags
+ * - Access to a sharable nesting parent pagetable across physical IOMMUs
+ * - Non-affiliated event reporting (e.g. an invalidation queue error)
+ * - Virtualization of various platforms IDs, e.g. RIDs and others
+ * - Delivery of paravirtualized invalidation
+ * - Direct assigned invalidation queues
+ * - Direct assigned interrupts
+ */
+struct iommu_viommu_alloc {
+ __u32 size;
+ __u32 flags;
+ __u32 type;
+ __u32 dev_id;
+ __u32 hwpt_id;
+ __u32 out_viommu_id;
+};
+#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
+
+/**
+ * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
+ * @size: sizeof(struct iommu_vdevice_alloc)
+ * @viommu_id: vIOMMU ID to associate with the virtual device
+ * @dev_id: The pyhsical device to allocate a virtual instance on the vIOMMU
+ * @__reserved: Must be 0
+ * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
+ * of AMD IOMMU, and vID of a nested Intel VT-d to a Context Table.
+ * @out_vdevice_id: Output virtual instance ID for the allocated object
+ * @__reserved2: Must be 0
+ *
+ * Allocate a virtual device instance (for a physical device) against a vIOMMU.
+ * This instance holds the device's information (related to its vIOMMU) in a VM.
+ */
+struct iommu_vdevice_alloc {
+ __u32 size;
+ __u32 viommu_id;
+ __u32 dev_id;
+ __u32 __reserved;
+ __aligned_u64 virt_id;
+ __u32 out_vdevice_id;
+ __u32 __reserved2;
+};
+#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
#endif
--
2.41.0.windows.1

View File

@ -0,0 +1,32 @@
From 237fdc8ddb0598234aace9c88ac4c8387119a12a Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 7 Jul 2022 11:55:25 -0400
Subject: [PATCH] acpi/gpex: Fix PCI Express Slot Information function 0
returned value
At the moment we do not support other function than function 0.
So according to ACPI spec "_DSM (Device Specific Method)"
description, bit 0 should rather be 0, meaning no other function is
supported than function 0.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/pci-host/gpex-acpi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index 1092dc3b70..ac5d229757 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -113,7 +113,7 @@ static void acpi_dsdt_add_pci_osc(Aml *dev)
UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D");
ifctx = aml_if(aml_equal(aml_arg(0), UUID));
ifctx1 = aml_if(aml_equal(aml_arg(2), aml_int(0)));
- uint8_t byte_list[1] = {1};
+ uint8_t byte_list[1] = {0};
buf = aml_buffer(1, byte_list);
aml_append(ifctx1, aml_return(buf));
aml_append(ifctx, ifctx1);
--
2.41.0.windows.1

View File

@ -0,0 +1,189 @@
From 9eacd1a6df6861b76663e98133adb15059bf65cc Mon Sep 17 00:00:00 2001
From: gongchangsui <gongchangsui@outlook.com>
Date: Mon, 17 Mar 2025 02:40:50 -0400
Subject: [PATCH] arm: VirtCCA: CVM support UEFI boot
1. Add UEFI boot support for Confidential VMs.
2. Modify the base memory address of Confidential VMs from 3GB to 1GB.
3. Disable pflash boot support for Confidential VMs; use the`-bios`option to specify`QEMU_EFI.fd`during launch.
Signed-off-by: gongchangsui <gongchangsui@outlook.com>
---
hw/arm/boot.c | 38 ++++++++++++++++++++++++++++++++++++--
hw/arm/virt.c | 33 ++++++++++++++++++++++++++++++++-
include/hw/arm/boot.h | 3 +++
3 files changed, 71 insertions(+), 3 deletions(-)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 42110b0f18..6b2f46af4d 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -43,6 +43,9 @@
#define BOOTLOADER_MAX_SIZE (4 * KiB)
+#define UEFI_MAX_SIZE 0x8000000
+#define UEFI_LOADER_START 0x0
+#define DTB_MAX 0x200000
AddressSpace *arm_boot_address_space(ARMCPU *cpu,
const struct arm_boot_info *info)
{
@@ -1155,7 +1158,31 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
}
}
-static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info)
+static void arm_setup_confidential_firmware_boot(ARMCPU *cpu,
+ struct arm_boot_info *info,
+ const char *firmware_filename)
+{
+ ssize_t fw_size;
+ const char *fname;
+ AddressSpace *as = arm_boot_address_space(cpu, info);
+
+ fname = qemu_find_file(QEMU_FILE_TYPE_BIOS, firmware_filename);
+ if (!fname) {
+ error_report("Could not find firmware image '%s'", firmware_filename);
+ exit(EXIT_FAILURE);
+ }
+
+ fw_size = load_image_targphys_as(firmware_filename,
+ info->firmware_base,
+ info->firmware_max_size, as);
+
+ if (fw_size <= 0) {
+ error_report("could not load firmware '%s'", firmware_filename);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, const char *firmware_filename)
{
/* Set up for booting firmware (which might load a kernel via fw_cfg) */
@@ -1166,6 +1193,8 @@ static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info)
* DTB to the base of RAM for the bootloader to pick up.
*/
info->dtb_start = info->loader_start;
+ if (info->confidential)
+ tmm_add_ram_region(UEFI_LOADER_START, UEFI_MAX_SIZE, info->dtb_start, DTB_MAX , true);
}
if (info->kernel_filename) {
@@ -1206,6 +1235,11 @@ static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info)
}
}
+ if (info->confidential) {
+ arm_setup_confidential_firmware_boot(cpu, info, firmware_filename);
+ kvm_load_user_data(UEFI_LOADER_START, UEFI_MAX_SIZE, info->loader_start, info->loader_start + DTB_MAX, info->ram_size,
+ (struct kvm_numa_info *)info->numa_info);
+ }
/*
* We will start from address 0 (typically a boot ROM image) in the
* same way as hardware. Leave env->boot_info NULL, so that
@@ -1282,7 +1316,7 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info)
/* Load the kernel. */
if (!info->kernel_filename || info->firmware_loaded) {
- arm_setup_firmware_boot(cpu, info);
+ arm_setup_firmware_boot(cpu, info, ms->firmware);
} else {
arm_setup_direct_kernel_boot(cpu, info);
}
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8823f2ed1c..6ffb26e7e6 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1398,6 +1398,9 @@ static void virt_flash_map1(PFlashCFI01 *flash,
qdev_prop_set_uint32(dev, "num-blocks", size / VIRT_FLASH_SECTOR_SIZE);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ if (virtcca_cvm_enabled()) {
+ return;
+ }
memory_region_add_subregion(sysmem, base,
sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
0));
@@ -1433,6 +1436,10 @@ static void virt_flash_fdt(VirtMachineState *vms,
MachineState *ms = MACHINE(vms);
char *nodename;
+ if (virtcca_cvm_enabled()) {
+ return;
+ }
+
if (sysmem == secure_sysmem) {
/* Report both flash devices as a single node in the DT */
nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
@@ -1468,6 +1475,23 @@ static void virt_flash_fdt(VirtMachineState *vms,
}
}
+static bool virt_confidential_firmware_init(VirtMachineState *vms,
+ MemoryRegion *sysmem)
+{
+ MemoryRegion *fw_ram;
+ hwaddr fw_base = vms->memmap[VIRT_FLASH].base;
+ hwaddr fw_size = vms->memmap[VIRT_FLASH].size;
+
+ if (!MACHINE(vms)->firmware) {
+ return false;
+ }
+
+ fw_ram = g_new(MemoryRegion, 1);
+ memory_region_init_ram(fw_ram, NULL, "fw_ram", fw_size, NULL);
+ memory_region_add_subregion(sysmem, fw_base, fw_ram);
+ return true;
+}
+
static bool virt_firmware_init(VirtMachineState *vms,
MemoryRegion *sysmem,
MemoryRegion *secure_sysmem)
@@ -1486,6 +1510,10 @@ static bool virt_firmware_init(VirtMachineState *vms,
pflash_blk0 = pflash_cfi01_get_blk(vms->flash[0]);
+ if (virtcca_cvm_enabled()) {
+ return virt_confidential_firmware_init(vms, sysmem);
+ }
+
bios_name = MACHINE(vms)->firmware;
if (bios_name) {
char *fname;
@@ -2023,7 +2051,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
vms->memmap[VIRT_PCIE_MMIO] = (MemMapEntry) { 0x10000000, 0x2edf0000 };
vms->memmap[VIRT_KAE_DEVICE] = (MemMapEntry) { 0x3edf0000, 0x00200000 };
- vms->memmap[VIRT_MEM].base = 3 * GiB;
+ vms->memmap[VIRT_MEM].base = 1 * GiB;
vms->memmap[VIRT_MEM].size = ms->ram_size;
info_report("[qemu] fix VIRT_MEM range 0x%llx - 0x%llx\n", (unsigned long long)(vms->memmap[VIRT_MEM].base),
(unsigned long long)(vms->memmap[VIRT_MEM].base + ms->ram_size));
@@ -2822,6 +2850,9 @@ static void machvirt_init(MachineState *machine)
vms->bootinfo.get_dtb = machvirt_dtb;
vms->bootinfo.skip_dtb_autoload = true;
vms->bootinfo.firmware_loaded = firmware_loaded;
+ vms->bootinfo.firmware_base = vms->memmap[VIRT_FLASH].base;
+ vms->bootinfo.firmware_max_size = vms->memmap[VIRT_FLASH].size;
+ vms->bootinfo.confidential = virtcca_cvm_enabled();
vms->bootinfo.psci_conduit = vms->psci_conduit;
arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo);
diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h
index 4491b1f85b..06ca1d90b2 100644
--- a/include/hw/arm/boot.h
+++ b/include/hw/arm/boot.h
@@ -133,6 +133,9 @@ struct arm_boot_info {
bool secure_board_setup;
arm_endianness endianness;
+ hwaddr firmware_base;
+ hwaddr firmware_max_size;
+ bool confidential;
};
/**
--
2.41.0.windows.1

View File

@ -0,0 +1,117 @@
From 5ed17a43a4cc7fc76397d6d8cad8246063b5b2f3 Mon Sep 17 00:00:00 2001
From: gongchangsui <gongchangsui@outlook.com>
Date: Mon, 17 Mar 2025 02:43:55 -0400
Subject: [PATCH] arm: VirtCCA: Compatibility with older versions of TMM and
the kernel
Since the base memory address of Confidential VMs in QEMU was changed
from 3GB to 1GB, corresponding adjustments are required in both the TMM
and kernel components. To maintain backward compatibility, the following
modifications were implemented:
1. **TMM Versioning**: The TMM version number was incremented to
reflect the update
2. **Kernel Interface**: A new interface was exposed in the kernel
to retrieve the TMM version number.
3. **QEMU Compatibility Logic**: During initialization, QEMU checks
the TMM version via the kernel interface. If the TMM version is**<2.1**(legacy),
QEMU sets the Confidential VM's base memory address to**3GB**. For TMM versions
**2.1**(updated), the address is configured to**1GB**to align with the new memory layout
This approach ensures seamless backward compatibility while transitioning
to the revised memory addressing scheme.
Signed-off-by: gongchangsui <gongchangsui@outlook.com>
---
accel/kvm/kvm-all.c | 3 +--
hw/arm/boot.c | 9 +++++++++
hw/arm/virt.c | 9 +++++++--
linux-headers/asm-arm64/kvm.h | 2 ++
linux-headers/linux/kvm.h | 3 +++
5 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index a8e29f148e..38a48cc031 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2390,6 +2390,7 @@ static int kvm_init(MachineState *ms)
qemu_mutex_init(&kml_slots_lock);
s = KVM_STATE(ms->accelerator);
+ kvm_state = s;
/*
* On systems where the kernel can support different base page
@@ -2609,8 +2610,6 @@ static int kvm_init(MachineState *ms)
#endif
}
- kvm_state = s;
-
ret = kvm_arch_init(ms, s);
if (ret < 0) {
goto err;
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 6b2f46af4d..ca9f69fd3d 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -1162,6 +1162,15 @@ static void arm_setup_confidential_firmware_boot(ARMCPU *cpu,
struct arm_boot_info *info,
const char *firmware_filename)
{
+ uint64_t tmi_version = 0;
+ if (kvm_ioctl(kvm_state, KVM_GET_TMI_VERSION, &tmi_version) < 0) {
+ error_report("please check the kernel version!");
+ exit(EXIT_FAILURE);
+ }
+ if (tmi_version < MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM) {
+ error_report("please check the tmi version!");
+ exit(EXIT_FAILURE);
+ }
ssize_t fw_size;
const char *fname;
AddressSpace *as = arm_boot_address_space(cpu, info);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6ffb26e7e6..39dfec0877 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2050,8 +2050,13 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
/* support kae vf device tree nodes */
vms->memmap[VIRT_PCIE_MMIO] = (MemMapEntry) { 0x10000000, 0x2edf0000 };
vms->memmap[VIRT_KAE_DEVICE] = (MemMapEntry) { 0x3edf0000, 0x00200000 };
-
- vms->memmap[VIRT_MEM].base = 1 * GiB;
+ uint64_t tmi_version = 0;
+ if (kvm_ioctl(kvm_state, KVM_GET_TMI_VERSION, &tmi_version) < 0) {
+ warn_report("can not get tmi version");
+ }
+ if (tmi_version < MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM) {
+ vms->memmap[VIRT_MEM].base = 3 * GiB;
+ }
vms->memmap[VIRT_MEM].size = ms->ram_size;
info_report("[qemu] fix VIRT_MEM range 0x%llx - 0x%llx\n", (unsigned long long)(vms->memmap[VIRT_MEM].base),
(unsigned long long)(vms->memmap[VIRT_MEM].base + ms->ram_size));
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 552fdcb18f..d69a71cbec 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -597,4 +597,6 @@ struct kvm_cap_arm_tmm_populate_region_args {
#endif
+#define MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM 0x20001
+
#endif /* __ARM_KVM_H__ */
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 84cec64b88..7a08f9b1e9 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -2422,4 +2422,7 @@ struct kvm_s390_zpci_op {
/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
+/* get tmi version */
+#define KVM_GET_TMI_VERSION _IOR(KVMIO, 0xd2, uint64_t)
+
#endif /* __LINUX_KVM_H */
--
2.41.0.windows.1

View File

@ -0,0 +1,137 @@
From 0119389040e4d78c6238875b812827d4f07b5f0f Mon Sep 17 00:00:00 2001
From: gongchangsui <gongchangsui@outlook.com>
Date: Mon, 17 Mar 2025 02:51:16 -0400
Subject: [PATCH] arm: VirtCCA: qemu CoDA support UEFI boot
1. Expose PCIe MMIO region from QEMU memory map.
2. Refactor struct kvm_user_data data_start and data_size represent
the address base and size of the MMIO in UEFI boot modedata_start
and data_size represent the address base and size of the DTB in direct boot mode.
Signed-off-by: gongchangsui <gongchangsui@outlook.com>
---
accel/kvm/kvm-all.c | 8 ++++----
hw/arm/boot.c | 10 ++++++----
hw/arm/virt.c | 6 ++++++
linux-headers/linux/kvm.h | 12 +++++++++---
target/arm/kvm_arm.h | 2 ++
5 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 38a48cc031..57c6718b77 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3527,7 +3527,7 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
return r;
}
-int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size,
+int kvm_load_user_data(hwaddr loader_start, hwaddr dtb_info, hwaddr data_start, hwaddr data_size, hwaddr ram_size,
struct kvm_numa_info *numa_info)
{
KVMState *state = kvm_state;
@@ -3535,9 +3535,9 @@ int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_star
int ret;
data.loader_start = loader_start;
- data.image_end = image_end;
- data.initrd_start = initrd_start;
- data.dtb_end = dtb_end;
+ data.dtb_info = dtb_info;
+ data.data_start = data_start;
+ data.data_size = data_size;
data.ram_size = ram_size;
memcpy(&data.numa_info, numa_info, sizeof(struct kvm_numa_info));
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index ca9f69fd3d..a3e0dbb68c 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -1149,10 +1149,10 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
if (kvm_enabled() && virtcca_cvm_enabled()) {
if (info->dtb_limit == 0) {
- info->dtb_limit = info->dtb_start + 0x200000;
+ info->dtb_limit = info->dtb_start + DTB_MAX;
}
- kvm_load_user_data(info->loader_start, image_high_addr, info->initrd_start,
- info->dtb_limit, info->ram_size, (struct kvm_numa_info *)info->numa_info);
+ kvm_load_user_data(info->loader_start, 0x1, info->dtb_start,
+ info->dtb_limit - info->dtb_start, info->ram_size, (struct kvm_numa_info *)info->numa_info);
tmm_add_ram_region(info->loader_start, image_high_addr - info->loader_start,
info->initrd_start, info->dtb_limit - info->initrd_start, true);
}
@@ -1193,6 +1193,7 @@ static void arm_setup_confidential_firmware_boot(ARMCPU *cpu,
static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, const char *firmware_filename)
{
+ hwaddr mmio_start, mmio_size;
/* Set up for booting firmware (which might load a kernel via fw_cfg) */
if (have_dtb(info)) {
@@ -1246,7 +1247,8 @@ static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, con
if (info->confidential) {
arm_setup_confidential_firmware_boot(cpu, info, firmware_filename);
- kvm_load_user_data(UEFI_LOADER_START, UEFI_MAX_SIZE, info->loader_start, info->loader_start + DTB_MAX, info->ram_size,
+ virtcca_kvm_get_mmio_addr(&mmio_start, &mmio_size);
+ kvm_load_user_data(info->loader_start, DTB_MAX, mmio_start, mmio_size, info->ram_size,
(struct kvm_numa_info *)info->numa_info);
}
/*
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 39dfec0877..6c5611826c 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -176,6 +176,12 @@ static const MemMapEntry base_memmap[] = {
[VIRT_MEM] = { GiB, LEGACY_RAMLIMIT_BYTES },
};
+void virtcca_kvm_get_mmio_addr(hwaddr *mmio_start, hwaddr *mmio_size)
+{
+ *mmio_start = base_memmap[VIRT_PCIE_MMIO].base;
+ *mmio_size = base_memmap[VIRT_PCIE_MMIO].size;
+}
+
/*
* Highmem IO Regions: This memory map is floating, located after the RAM.
* Each MemMapEntry base (GPA) will be dynamically computed, depending on the
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 7a08f9b1e9..c9ec7f862a 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1510,9 +1510,15 @@ struct kvm_numa_info {
struct kvm_user_data {
__u64 loader_start;
- __u64 image_end;
- __u64 initrd_start;
- __u64 dtb_end;
+ /*
+ * When the lowest bit of dtb_info is 0, the value of dtb_info represents the size of the DTB,
+ * and data_start and data_size represent the address base and size of the MMIO.
+ * When the lowest bit of dtb_info is 1, data_start and data_size represent the address base
+ * and size of the DTB.
+ */
+ __u64 dtb_info;
+ __u64 data_start;
+ __u64 data_size;
__u64 ram_size;
struct kvm_numa_info numa_info;
};
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 31457a57f7..62fbb713f4 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -73,6 +73,8 @@ int kvm_arm_vcpu_finalize(CPUState *cs, int feature);
void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
uint64_t attr, int dev_fd, uint64_t addr_ormask);
+void virtcca_kvm_get_mmio_addr(hwaddr *mmio_start, hwaddr *mmio_size);
+
/**
* kvm_arm_init_cpreg_list:
* @cpu: ARMCPU
--
2.41.0.windows.1

View File

@ -0,0 +1,100 @@
From 5bffeb311c969a0e05106e4bf54282431c5ba907 Mon Sep 17 00:00:00 2001
From: gongchangsui <gongchangsui@outlook.com>
Date: Mon, 17 Mar 2025 02:42:43 -0400
Subject: [PATCH] arm: VirtCCA: qemu uefi boot support kae
This commit introduces modifications to enable KAE functionality
during UEFI boot in cVMs. Additionally,the ACPI feature must be
configured in cVM.
Signed-off-by: gongchangsui <gongchangsui@outlook.com>
---
hw/arm/virt-acpi-build.c | 58 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 076781423b..f78331d69f 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -58,6 +58,7 @@
#include "migration/vmstate.h"
#include "hw/acpi/ghes.h"
#include "hw/acpi/viot.h"
+#include "kvm_arm.h"
#define ARM_SPI_BASE 32
@@ -405,6 +406,54 @@ static void acpi_dsdt_add_virtio(Aml *scope,
}
}
+static void acpi_dsdt_add_hisi_sec(Aml *scope,
+ const MemMapEntry *virtio_mmio_memmap,
+ int dev_id)
+{
+ hwaddr size = 0x10000;
+
+ /*
+ * Calculate the base address for the sec device node.
+ * Each device group contains one sec device and one hpre device,spaced by 2 * size.
+ */
+ hwaddr base = virtio_mmio_memmap->base + dev_id * 2 * size;
+
+ Aml *dev = aml_device("SE%02u", dev_id);
+ aml_append(dev, aml_name_decl("_HID", aml_string("SEC07")));
+ aml_append(dev, aml_name_decl("_UID", aml_int(dev_id)));
+ aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+
+ Aml *crs = aml_resource_template();
+
+ aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE));
+ aml_append(dev, aml_name_decl("_CRS", crs));
+ aml_append(scope, dev);
+}
+
+static void acpi_dsdt_add_hisi_hpre(Aml *scope,
+ const MemMapEntry *virtio_mmio_memmap,
+ int dev_id)
+{
+ hwaddr size = 0x10000;
+
+ /*
+ * Calculate the base address for the hpre device node.
+ * Each hpre device follows the corresponding sec device by an additional offset of size.
+ */
+ hwaddr base = virtio_mmio_memmap->base + dev_id * 2 * size + size;
+
+ Aml *dev = aml_device("HP%02u", dev_id);
+ aml_append(dev, aml_name_decl("_HID", aml_string("HPRE07")));
+ aml_append(dev, aml_name_decl("_UID", aml_int(dev_id)));
+ aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+
+ Aml *crs = aml_resource_template();
+
+ aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE));
+ aml_append(dev, aml_name_decl("_CRS", crs));
+ aml_append(scope, dev);
+}
+
static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
uint32_t irq, VirtMachineState *vms)
{
@@ -1201,6 +1250,15 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO],
(irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS);
acpi_dsdt_add_pci(scope, memmap, irqmap[VIRT_PCIE] + ARM_SPI_BASE, vms);
+
+ if (virtcca_cvm_enabled()) {
+ int kae_num = tmm_get_kae_num();
+ for (int i = 0; i < kae_num; i++) {
+ acpi_dsdt_add_hisi_sec(scope, &memmap[VIRT_KAE_DEVICE], i);
+ acpi_dsdt_add_hisi_hpre(scope, &memmap[VIRT_KAE_DEVICE], i);
+ }
+ }
+
if (vms->acpi_dev) {
build_ged_aml(scope, "\\_SB."GED_DEVICE,
HOTPLUG_HANDLER(vms->acpi_dev),
--
2.41.0.windows.1

View File

@ -0,0 +1,150 @@
From 0978556247d968ffc83beff3b2611c93fd9b6b13 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Thu, 12 Sep 2024 00:17:31 -0700
Subject: [PATCH] backend/iommufd: Report PASID capability
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
backends/iommufd.c | 4 +++-
hw/arm/smmu-common.c | 4 ++--
hw/arm/smmuv3.c | 4 +++-
hw/vfio/iommufd.c | 4 +++-
include/hw/arm/smmu-common.h | 2 +-
include/sysemu/host_iommu_device.h | 1 +
include/sysemu/iommufd.h | 3 ++-
7 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index e9ce82297b..4f5df63331 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -326,7 +326,8 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
- uint64_t *caps, Error **errp)
+ uint64_t *caps, uint8_t *max_pasid_log2,
+ Error **errp)
{
struct iommu_hw_info info = {
.size = sizeof(info),
@@ -344,6 +345,7 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
*type = info.out_data_type;
g_assert(caps);
*caps = info.out_capabilities;
+ *max_pasid_log2 = info.out_max_pasid_log2;
return true;
}
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index c382fa16e5..e7028bd4ec 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -853,7 +853,7 @@ SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid)
/* IOMMUFD helpers */
int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type,
- uint32_t data_len, void *data)
+ uint32_t data_len, uint8_t *pasid, void *data)
{
uint64_t caps;
@@ -863,7 +863,7 @@ int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type,
return !iommufd_backend_get_device_info(sdev->idev->iommufd,
sdev->idev->devid, data_type, data,
- data_len, &caps, NULL);
+ data_len, &caps, pasid, NULL);
}
void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 30c0ae4c3b..0ca0e96fcc 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -264,6 +264,7 @@ static void smmuv3_nested_init_regs(SMMUv3State *s)
SMMUDevice *sdev;
uint32_t data_type;
uint32_t val;
+ uint8_t pasid;
int ret;
if (!bs->nested || !bs->viommu) {
@@ -280,7 +281,8 @@ static void smmuv3_nested_init_regs(SMMUv3State *s)
goto out;
}
- ret = smmu_dev_get_info(sdev, &data_type, sizeof(sdev->info), &sdev->info);
+ ret = smmu_dev_get_info(sdev, &data_type, sizeof(sdev->info), &pasid,
+ &sdev->info);
if (ret) {
error_report("failed to get SMMU device info");
return;
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index c0eb87c78c..a108beda29 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -871,18 +871,20 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
struct iommu_hw_info_vtd vtd;
} data;
uint64_t hw_caps;
+ uint8_t pasids;
hiod->agent = opaque;
if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
&type, &data, sizeof(data),
- &hw_caps, errp)) {
+ &hw_caps, &pasids, errp)) {
return false;
}
hiod->name = g_strdup(vdev->name);
caps->type = type;
caps->hw_caps = hw_caps;
+ caps->max_pasid_log2 = pasids;
return true;
}
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 087a11efc7..8ae33c3753 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -276,7 +276,7 @@ void smmu_inv_notifiers_all(SMMUState *s);
/* IOMMUFD helpers */
int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type,
- uint32_t data_len, void *data);
+ uint32_t data_len, uint8_t *pasid, void *data);
void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort);
int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type,
uint32_t data_len, void *data,
diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
index 84131f5495..22c76a37a7 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -26,6 +26,7 @@
typedef struct HostIOMMUDeviceCaps {
uint32_t type;
uint64_t hw_caps;
+ uint8_t max_pasid_log2;
} HostIOMMUDeviceCaps;
#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index b279184974..29afaa429d 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -57,7 +57,8 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size);
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
- uint64_t *caps, Error **errp);
+ uint64_t *caps, uint8_t *max_pasid_log2,
+ Error **errp);
bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
uint32_t pt_id, uint32_t flags,
uint32_t data_type, uint32_t data_len,
--
2.41.0.windows.1

View File

@ -0,0 +1,162 @@
From 626698a1e9edff6a1032f496858555e1a4614fbe Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:27 +0800
Subject: [PATCH] backends: Introduce HostIOMMUDevice abstract
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A HostIOMMUDevice is an abstraction for an assigned device that is protected
by a physical IOMMU (aka host IOMMU). The userspace interaction with this
physical IOMMU can be done either through the VFIO IOMMU type 1 legacy
backend or the new iommufd backend. The assigned device can be a VFIO device
or a VDPA device. The HostIOMMUDevice is needed to interact with the host
IOMMU that protects the assigned device. It is especially useful when the
device is also protected by a virtual IOMMU as this latter use the translation
services of the physical IOMMU and is constrained by it. In that context the
HostIOMMUDevice can be passed to the virtual IOMMU to collect physical IOMMU
capabilities such as the supported address width. In the future, the virtual
IOMMU will use the HostIOMMUDevice to program the guest page tables in the
first translation stage of the physical IOMMU.
Introduce .realize() to initialize HostIOMMUDevice further after instance init.
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
MAINTAINERS | 2 ++
backends/host_iommu_device.c | 33 +++++++++++++++++++
backends/meson.build | 1 +
include/sysemu/host_iommu_device.h | 53 ++++++++++++++++++++++++++++++
4 files changed, 89 insertions(+)
create mode 100644 backends/host_iommu_device.c
create mode 100644 include/sysemu/host_iommu_device.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 0ddb20a35f..ada87bfa9e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2174,6 +2174,8 @@ M: Zhenzhong Duan <zhenzhong.duan@intel.com>
S: Supported
F: backends/iommufd.c
F: include/sysemu/iommufd.h
+F: backends/host_iommu_device.c
+F: include/sysemu/host_iommu_device.h
F: include/qemu/chardev_open.h
F: util/chardev_open.c
F: docs/devel/vfio-iommufd.rst
diff --git a/backends/host_iommu_device.c b/backends/host_iommu_device.c
new file mode 100644
index 0000000000..8f2dda1beb
--- /dev/null
+++ b/backends/host_iommu_device.c
@@ -0,0 +1,33 @@
+/*
+ * Host IOMMU device abstract
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/host_iommu_device.h"
+
+OBJECT_DEFINE_ABSTRACT_TYPE(HostIOMMUDevice,
+ host_iommu_device,
+ HOST_IOMMU_DEVICE,
+ OBJECT)
+
+static void host_iommu_device_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void host_iommu_device_init(Object *obj)
+{
+}
+
+static void host_iommu_device_finalize(Object *obj)
+{
+ HostIOMMUDevice *hiod = HOST_IOMMU_DEVICE(obj);
+
+ g_free(hiod->name);
+}
diff --git a/backends/meson.build b/backends/meson.build
index 9a5cea480d..68b5e34e04 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -13,6 +13,7 @@ system_ss.add([files(
system_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c'))
system_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c'))
system_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c'))
+system_ss.add(when: 'CONFIG_LINUX', if_true: files('host_iommu_device.c'))
if keyutils.found()
system_ss.add(keyutils, files('cryptodev-lkcf.c'))
endif
diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
new file mode 100644
index 0000000000..db47a16189
--- /dev/null
+++ b/include/sysemu/host_iommu_device.h
@@ -0,0 +1,53 @@
+/*
+ * Host IOMMU device abstract declaration
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef HOST_IOMMU_DEVICE_H
+#define HOST_IOMMU_DEVICE_H
+
+#include "qom/object.h"
+#include "qapi/error.h"
+
+#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
+OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
+
+struct HostIOMMUDevice {
+ Object parent_obj;
+
+ char *name;
+};
+
+/**
+ * struct HostIOMMUDeviceClass - The base class for all host IOMMU devices.
+ *
+ * Different types of host devices (e.g., VFIO or VDPA device) or devices
+ * with different backend (e.g., VFIO legacy container or IOMMUFD backend)
+ * will have different implementations of the HostIOMMUDeviceClass.
+ */
+struct HostIOMMUDeviceClass {
+ ObjectClass parent_class;
+
+ /**
+ * @realize: initialize host IOMMU device instance further.
+ *
+ * Mandatory callback.
+ *
+ * @hiod: pointer to a host IOMMU device instance.
+ *
+ * @opaque: pointer to agent device of this host IOMMU device,
+ * e.g., VFIO base device or VDPA device.
+ *
+ * @errp: pass an Error out when realize fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+ bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp);
+};
+#endif
--
2.41.0.windows.1

View File

@ -0,0 +1,113 @@
From bc08940ad3c75da49e05c596f79e9e0164573709 Mon Sep 17 00:00:00 2001
From: gongchangsui <gongchangsui@outlook.com>
Date: Mon, 17 Mar 2025 02:56:40 -0400
Subject: [PATCH] backends: VirtCCA: cvm_gpa_start supports both 1GB and 3GB
For TMM versions 2.1 and above, `cvm_gpa_start` is 1GB, while for
versions prior to 2.1, `cvm_gpa_start` is 3GB. Shared huge page memory
supports both `cvm_gpa_start` values.
Signed-off-by: gongchangsui <gongchangsui@outlook.com>
---
backends/hostmem-file.c | 17 ++++++++++++++---
hw/arm/virt.c | 1 +
hw/core/numa.c | 2 +-
include/exec/memory.h | 11 +++++++----
4 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 891fe4ac4a..ce63a372a3 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -27,6 +27,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendFile, MEMORY_BACKEND_FILE)
bool virtcca_shared_hugepage_mapped = false;
uint64_t virtcca_cvm_ram_size = 0;
+uint64_t virtcca_cvm_gpa_start = 0;
struct HostMemoryBackendFile {
HostMemoryBackend parent_obj;
@@ -101,8 +102,16 @@ virtcca_shared_backend_memory_alloc(char *mem_path, uint32_t ram_flags, Error **
error_report("parse virtcca share memory path failed");
exit(1);
}
- if (virtcca_cvm_ram_size >= VIRTCCA_SHARED_HUGEPAGE_MAX_SIZE) {
- size = VIRTCCA_SHARED_HUGEPAGE_MAX_SIZE;
+
+ /*
+ * 1) CVM_GPA_START = 3GB --> fix size = 1GB
+ * 2) CVM_GPA_START = 1GB && ram_size >= 3GB --> size = 3GB
+ * 3) CVM_GPA_START = 1GB && ram_size < 3GB --> size = ram_size
+ */
+ if (virtcca_cvm_gpa_start != DEFAULT_VM_GPA_START) {
+ size = VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT - virtcca_cvm_gpa_start;
+ } else if (virtcca_cvm_ram_size >= VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT - DEFAULT_VM_GPA_START) {
+ size = VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT - DEFAULT_VM_GPA_START;
}
virtcca_shared_hugepage = g_new(MemoryRegion, 1);
@@ -172,7 +181,9 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
fb->mem_path, fb->offset, errp);
g_free(name);
- if (virtcca_cvm_enabled() && backend->share && !virtcca_shared_hugepage_mapped) {
+ if (virtcca_cvm_enabled() && backend->share &&
+ (strcmp(fb->mem_path, "/dev/shm") != 0) &&
+ !virtcca_shared_hugepage_mapped) {
virtcca_shared_backend_memory_alloc(fb->mem_path, ram_flags, errp);
}
#endif
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6c5611826c..3c31d3667e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2063,6 +2063,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
if (tmi_version < MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM) {
vms->memmap[VIRT_MEM].base = 3 * GiB;
}
+ virtcca_cvm_gpa_start = vms->memmap[VIRT_MEM].base;
vms->memmap[VIRT_MEM].size = ms->ram_size;
info_report("[qemu] fix VIRT_MEM range 0x%llx - 0x%llx\n", (unsigned long long)(vms->memmap[VIRT_MEM].base),
(unsigned long long)(vms->memmap[VIRT_MEM].base + ms->ram_size));
diff --git a/hw/core/numa.c b/hw/core/numa.c
index c691578ef5..98d896e687 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -655,7 +655,7 @@ static void virtcca_shared_memory_configuration(MachineState *ms)
memory_region_init_alias(alias_mr, NULL, "alias-mr", virtcca_shared_hugepage,
0, int128_get64(virtcca_shared_hugepage->size));
memory_region_add_subregion(address_space_virtcca_shared_memory.root,
- VIRTCCA_GPA_START, alias_mr);
+ virtcca_cvm_gpa_start, alias_mr);
}
void numa_complete_configuration(MachineState *ms)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 33778f5c64..c14dc69d27 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -243,14 +243,17 @@ typedef struct IOMMUTLBEvent {
/* RAM FD is opened read-only */
#define RAM_READONLY_FD (1 << 11)
-/* The GPA range of the VirtCCA bounce buffer is from 1GB to 4GB. */
-#define VIRTCCA_SHARED_HUGEPAGE_MAX_SIZE 0xc0000000ULL
+/* The address limit of the VirtCCA bounce buffer is 4GB. */
+#define VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT 0x100000000ULL
/* The VirtCCA shared hugepage memory granularity is 1GB */
#define VIRTCCA_SHARED_HUGEPAGE_ALIGN 0x40000000ULL
-/* The GPA starting address of the VirtCCA CVM is 1GB */
-#define VIRTCCA_GPA_START 0x40000000ULL
+/* The default GPA starting address of VM is 1GB */
+#define DEFAULT_VM_GPA_START 0x40000000ULL
+
+/* The GPA starting address of the VirtCCA CVM is 1GB or 3GB */
+extern uint64_t virtcca_cvm_gpa_start;
extern uint64_t virtcca_cvm_ram_size;
--
2.41.0.windows.1

View File

@ -0,0 +1,71 @@
From 29080940b37ce7486a46ab5534383321319fe2c5 Mon Sep 17 00:00:00 2001
From: gubin <gubin_yewu@cmss.chinamobile.com>
Date: Sat, 22 Mar 2025 15:10:32 +0800
Subject: [PATCH] backends/cryptodev: Do not abort for invalid session ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cherry-pick from eaf2bd29538d039df80bb4b1584de33a61312bc6
Instead of aborting when a session ID is invalid,
return VIRTIO_CRYPTO_INVSESS ("Invalid session id").
Reproduced using:
$ cat << EOF | qemu-system-i386 -display none \
-machine q35,accel=qtest -m 512M -nodefaults \
-object cryptodev-backend-builtin,id=cryptodev0 \
-device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \
-qtest stdio
outl 0xcf8 0x80000804
outw 0xcfc 0x06
outl 0xcf8 0x80000820
outl 0xcfc 0xe0008000
write 0x10800e 0x1 0x01
write 0xe0008016 0x1 0x01
write 0xe0008020 0x4 0x00801000
write 0xe0008028 0x4 0x00c01000
write 0xe000801c 0x1 0x01
write 0x110000 0x1 0x05
write 0x110001 0x1 0x04
write 0x108002 0x1 0x11
write 0x108008 0x1 0x48
write 0x10800c 0x1 0x01
write 0x108018 0x1 0x10
write 0x10801c 0x1 0x02
write 0x10c002 0x1 0x01
write 0xe000b005 0x1 0x00
EOF
Assertion failed: (session_id < MAX_NUM_SESSIONS && builtin->sessions[session_id]),
function cryptodev_builtin_close_session, file cryptodev-builtin.c, line 430.
Cc: qemu-stable@nongnu.org
Reported-by: Zheyu Ma <zheyuma97@gmail.com>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2274
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: zhenwei pi <pizhenwei@bytedance.com>
Message-Id: <20240409094757.9127-1-philmd@linaro.org>
Signed-off-by: gubin <gubin_yewu@cmss.chinamobile.com>
---
backends/cryptodev-builtin.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c
index 0822f198d9..940104ee55 100644
--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -428,7 +428,9 @@ static int cryptodev_builtin_close_session(
CRYPTODEV_BACKEND_BUILTIN(backend);
CryptoDevBackendBuiltinSession *session;
- assert(session_id < MAX_NUM_SESSIONS && builtin->sessions[session_id]);
+ if (session_id >= MAX_NUM_SESSIONS || !builtin->sessions[session_id]) {
+ return -VIRTIO_CRYPTO_INVSESS;
+ }
session = builtin->sessions[session_id];
if (session->cipher) {
--
2.41.0.windows.1

View File

@ -0,0 +1,65 @@
From 690812903469db798ebae012248b9231d5ce9f11 Mon Sep 17 00:00:00 2001
From: gubin <gubin_yewu@cmss.chinamobile.com>
Date: Sat, 22 Mar 2025 15:15:08 +0800
Subject: [PATCH] backends/cryptodev: Do not ignore throttle/backends Errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cherry-pick from 484aecf2d3a75251b63481be2a0c3aef635002af
Both cryptodev_backend_set_throttle() and CryptoDevBackendClass::init()
can set their Error** argument. Do not ignore them, return early
on failure. Without that, running into another failure trips
error_setv()'s assertion. Use the ERRP_GUARD() macro as suggested
in commit ae7c80a7bd ("error: New macro ERRP_GUARD()").
Cc: qemu-stable@nongnu.org
Fixes: e7a775fd9f ("cryptodev: Account statistics")
Fixes: 2580b452ff ("cryptodev: support QoS")
Reviewed-by: zhenwei pi <pizhenwei@bytedance.com>
Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-Id: <20231120150418.93443-1-philmd@linaro.org>
Signed-off-by: gubin <gubin_yewu@cmss.chinamobile.com>
---
backends/cryptodev.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/backends/cryptodev.c b/backends/cryptodev.c
index e5006bd215..fff89fd62a 100644
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -398,6 +398,7 @@ static void cryptodev_backend_set_ops(Object *obj, Visitor *v,
static void
cryptodev_backend_complete(UserCreatable *uc, Error **errp)
{
+ ERRP_GUARD();
CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc);
CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc);
uint32_t services;
@@ -406,11 +407,20 @@ cryptodev_backend_complete(UserCreatable *uc, Error **errp)
QTAILQ_INIT(&backend->opinfos);
value = backend->tc.buckets[THROTTLE_OPS_TOTAL].avg;
cryptodev_backend_set_throttle(backend, THROTTLE_OPS_TOTAL, value, errp);
+ if (*errp) {
+ return;
+ }
value = backend->tc.buckets[THROTTLE_BPS_TOTAL].avg;
cryptodev_backend_set_throttle(backend, THROTTLE_BPS_TOTAL, value, errp);
+ if (*errp) {
+ return;
+ }
if (bc->init) {
bc->init(backend, errp);
+ if (*errp) {
+ return;
+ }
}
services = backend->conf.crypto_services;
--
2.41.0.windows.1

View File

@ -0,0 +1,91 @@
From ca210a4a8fe97dd56baa184671bb48bff9a54ecb Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:28 +0800
Subject: [PATCH] backends/host_iommu_device: Introduce HostIOMMUDeviceCaps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
HostIOMMUDeviceCaps's elements map to the host IOMMU's capabilities.
Different platform IOMMU can support different elements.
Currently only two elements, type and aw_bits, type hints the host
platform IOMMU type, i.e., INTEL vtd, ARM smmu, etc; aw_bits hints
host IOMMU address width.
Introduce .get_cap() handler to check if HOST_IOMMU_DEVICE_CAP_XXX
is supported.
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
include/sysemu/host_iommu_device.h | 38 ++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
index db47a16189..a57873958b 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -15,6 +15,18 @@
#include "qom/object.h"
#include "qapi/error.h"
+/**
+ * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
+ *
+ * @type: host platform IOMMU type.
+ *
+ * @aw_bits: host IOMMU address width. 0xff if no limitation.
+ */
+typedef struct HostIOMMUDeviceCaps {
+ uint32_t type;
+ uint8_t aw_bits;
+} HostIOMMUDeviceCaps;
+
#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
@@ -22,6 +34,7 @@ struct HostIOMMUDevice {
Object parent_obj;
char *name;
+ HostIOMMUDeviceCaps caps;
};
/**
@@ -49,5 +62,30 @@ struct HostIOMMUDeviceClass {
* Returns: true on success, false on failure.
*/
bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp);
+ /**
+ * @get_cap: check if a host IOMMU device capability is supported.
+ *
+ * Optional callback, if not implemented, hint not supporting query
+ * of @cap.
+ *
+ * @hiod: pointer to a host IOMMU device instance.
+ *
+ * @cap: capability to check.
+ *
+ * @errp: pass an Error out when fails to query capability.
+ *
+ * Returns: <0 on failure, 0 if a @cap is unsupported, or else
+ * 1 or some positive value for some special @cap,
+ * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS.
+ */
+ int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp);
};
+
+/*
+ * Host IOMMU device capability list.
+ */
+#define HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE 0
+#define HOST_IOMMU_DEVICE_CAP_AW_BITS 1
+
+#define HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX 64
#endif
--
2.41.0.windows.1

View File

@ -0,0 +1,81 @@
From cedca4d3635cde049151b5818df2cb66c2b1531f Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Fri, 3 Nov 2023 16:54:01 +0800
Subject: [PATCH] backends/iommufd: Add helpers for invalidating user-managed
HWPT
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
backends/iommufd.c | 30 ++++++++++++++++++++++++++++++
backends/trace-events | 1 +
include/sysemu/iommufd.h | 3 +++
3 files changed, 34 insertions(+)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index c1260766f0..cf24370385 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -330,6 +330,36 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
return true;
}
+int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data_ptr)
+{
+ int ret, fd = be->fd;
+ struct iommu_hwpt_invalidate cache = {
+ .size = sizeof(cache),
+ .hwpt_id = hwpt_id,
+ .data_type = data_type,
+ .entry_len = entry_len,
+ .entry_num = *entry_num,
+ .data_uptr = (uintptr_t)data_ptr,
+ };
+
+ ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
+
+ trace_iommufd_backend_invalidate_cache(fd, hwpt_id, data_type, entry_len,
+ *entry_num, cache.entry_num,
+ (uintptr_t)data_ptr, ret);
+ if (ret) {
+ *entry_num = cache.entry_num;
+ error_report("IOMMU_HWPT_INVALIDATE failed: %s", strerror(errno));
+ ret = -errno;
+ } else {
+ g_assert(*entry_num == cache.entry_num);
+ }
+
+ return ret;
+}
+
static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
{
HostIOMMUDeviceCaps *caps = &hiod->caps;
diff --git a/backends/trace-events b/backends/trace-events
index b02433710a..ef0ff98921 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -18,3 +18,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
+iommufd_backend_invalidate_cache(int iommufd, uint32_t hwpt_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d hwpt_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 3b28c8a81c..f6596f6338 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -63,6 +63,9 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
uint64_t iova, ram_addr_t size,
uint64_t page_size, uint64_t *data,
Error **errp);
+int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data_ptr);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
--
2.41.0.windows.1

View File

@ -0,0 +1,78 @@
From 7d53d0938921d0faa32e1fef4c7bcc45d21f9bfb Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Fri, 19 Jul 2024 13:04:51 +0100
Subject: [PATCH] backends/iommufd: Extend iommufd_backend_get_device_info() to
fetch HW capabilities
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The helper will be able to fetch vendor agnostic IOMMU capabilities
supported both by hardware and software. Right now it is only iommu dirty
tracking.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
---
backends/iommufd.c | 4 +++-
hw/vfio/iommufd.c | 4 +++-
include/sysemu/iommufd.h | 2 +-
3 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 7e805bd664..1ce2a24226 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -225,7 +225,7 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
- Error **errp)
+ uint64_t *caps, Error **errp)
{
struct iommu_hw_info info = {
.size = sizeof(info),
@@ -241,6 +241,8 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
g_assert(type);
*type = info.out_data_type;
+ g_assert(caps);
+ *caps = info.out_capabilities;
return true;
}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 7cbf0e44f1..d5b923ca83 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -647,9 +647,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
union {
struct iommu_hw_info_vtd vtd;
} data;
+ uint64_t hw_caps;
if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
- &type, &data, sizeof(data), errp)) {
+ &type, &data, sizeof(data),
+ &hw_caps, errp)) {
return false;
}
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index dfade18e6d..a0a0143856 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -51,7 +51,7 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size);
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
- Error **errp);
+ uint64_t *caps, Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
--
2.41.0.windows.1

View File

@ -0,0 +1,61 @@
From 2f1a2f4b320e70a85cef8392cd5f4b1e54afb9c9 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:36 +0800
Subject: [PATCH] backends/iommufd: Implement HostIOMMUDeviceClass::get_cap()
handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
backends/iommufd.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 604a8f4e7d..7e805bd664 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -245,6 +245,28 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
return true;
}
+static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
+{
+ HostIOMMUDeviceCaps *caps = &hiod->caps;
+
+ switch (cap) {
+ case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
+ return caps->type;
+ case HOST_IOMMU_DEVICE_CAP_AW_BITS:
+ return caps->aw_bits;
+ default:
+ error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
+ return -EINVAL;
+ }
+}
+
+static void hiod_iommufd_class_init(ObjectClass *oc, void *data)
+{
+ HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+ hioc->get_cap = hiod_iommufd_get_cap;
+};
+
static const TypeInfo types[] = {
{
.name = TYPE_IOMMUFD_BACKEND,
@@ -261,6 +283,7 @@ static const TypeInfo types[] = {
}, {
.name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
.parent = TYPE_HOST_IOMMU_DEVICE,
+ .class_init = hiod_iommufd_class_init,
.abstract = true,
}
};
--
2.41.0.windows.1

View File

@ -0,0 +1,158 @@
From 50142057ec070a70f3f38ec272ec61cc3ae6e071 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:30 +0800
Subject: [PATCH] backends/iommufd: Introduce
TYPE_HOST_IOMMU_DEVICE_IOMMUFD[_VFIO] devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
TYPE_HOST_IOMMU_DEVICE_IOMMUFD represents a host IOMMU device under
iommufd backend. It is abstract, because it is going to be derived
into VFIO or VDPA type'd device.
It will have its own .get_cap() implementation.
TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO is a sub-class of
TYPE_HOST_IOMMU_DEVICE_IOMMUFD, represents a VFIO type'd host IOMMU
device under iommufd backend. It will be created during VFIO device
attaching and passed to vIOMMU.
It will have its own .realize() implementation.
Opportunistically, add missed header to include/sysemu/iommufd.h.
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
backends/iommufd.c | 36 +++++++++++++++++------------------
hw/vfio/iommufd.c | 5 ++++-
include/hw/vfio/vfio-common.h | 3 +++
include/sysemu/iommufd.h | 16 ++++++++++++++++
4 files changed, 41 insertions(+), 19 deletions(-)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index ba58a0eb0d..a2b7f5c3c4 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -223,23 +223,23 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
return ret;
}
-static const TypeInfo iommufd_backend_info = {
- .name = TYPE_IOMMUFD_BACKEND,
- .parent = TYPE_OBJECT,
- .instance_size = sizeof(IOMMUFDBackend),
- .instance_init = iommufd_backend_init,
- .instance_finalize = iommufd_backend_finalize,
- .class_size = sizeof(IOMMUFDBackendClass),
- .class_init = iommufd_backend_class_init,
- .interfaces = (InterfaceInfo[]) {
- { TYPE_USER_CREATABLE },
- { }
+static const TypeInfo types[] = {
+ {
+ .name = TYPE_IOMMUFD_BACKEND,
+ .parent = TYPE_OBJECT,
+ .instance_size = sizeof(IOMMUFDBackend),
+ .instance_init = iommufd_backend_init,
+ .instance_finalize = iommufd_backend_finalize,
+ .class_size = sizeof(IOMMUFDBackendClass),
+ .class_init = iommufd_backend_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+ }, {
+ .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
+ .parent = TYPE_HOST_IOMMU_DEVICE,
+ .abstract = true,
}
};
-
-static void register_types(void)
-{
- type_register_static(&iommufd_backend_info);
-}
-
-type_init(register_types);
+DEFINE_TYPES(types)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index d4c586e842..7a4b818830 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -641,7 +641,10 @@ static const TypeInfo types[] = {
.name = TYPE_VFIO_IOMMU_IOMMUFD,
.parent = TYPE_VFIO_IOMMU,
.class_init = vfio_iommu_iommufd_class_init,
- },
+ }, {
+ .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO,
+ .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
+ }
};
DEFINE_TYPES(types)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 0c807c2806..2cfc8521cd 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -32,6 +32,7 @@
#include "sysemu/sysemu.h"
#include "hw/vfio/vfio-container-base.h"
#include "sysemu/host_iommu_device.h"
+#include "sysemu/iommufd.h"
#define VFIO_MSG_PREFIX "vfio %s: "
@@ -77,6 +78,8 @@ typedef struct VFIOMigration {
struct VFIOGroup;
#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
+#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
+ TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
typedef struct VFIODMARange {
QLIST_ENTRY(VFIODMARange) next;
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 9c5524b0ed..1a75e82f42 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -1,3 +1,16 @@
+/*
+ * iommufd container backend declaration
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ * Copyright Red Hat, Inc. 2024
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ * Eric Auger <eric.auger@redhat.com>
+ * Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
#ifndef SYSEMU_IOMMUFD_H
#define SYSEMU_IOMMUFD_H
@@ -5,6 +18,7 @@
#include "qemu/thread.h"
#include "exec/hwaddr.h"
#include "exec/cpu-common.h"
+#include "sysemu/host_iommu_device.h"
#define TYPE_IOMMUFD_BACKEND "iommufd"
OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
@@ -35,4 +49,6 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly);
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size);
+
+#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
--
2.41.0.windows.1

View File

@ -0,0 +1,69 @@
From ccd8baf4648e6fd6b69e65ee249609904edc92e1 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:33 +0800
Subject: [PATCH] backends/iommufd: Introduce helper function
iommufd_backend_get_device_info()
Introduce a helper function iommufd_backend_get_device_info() to get
host IOMMU related information through iommufd uAPI.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
backends/iommufd.c | 22 ++++++++++++++++++++++
include/sysemu/iommufd.h | 3 +++
2 files changed, 25 insertions(+)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index a2b7f5c3c4..604a8f4e7d 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -223,6 +223,28 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
return ret;
}
+bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
+ uint32_t *type, void *data, uint32_t len,
+ Error **errp)
+{
+ struct iommu_hw_info info = {
+ .size = sizeof(info),
+ .dev_id = devid,
+ .data_len = len,
+ .data_uptr = (uintptr_t)data,
+ };
+
+ if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) {
+ error_setg_errno(errp, errno, "Failed to get hardware info");
+ return false;
+ }
+
+ g_assert(type);
+ *type = info.out_data_type;
+
+ return true;
+}
+
static const TypeInfo types[] = {
{
.name = TYPE_IOMMUFD_BACKEND,
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 1a75e82f42..dfade18e6d 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -49,6 +49,9 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly);
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size);
+bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
+ uint32_t *type, void *data, uint32_t len,
+ Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
--
2.41.0.windows.1

View File

@ -0,0 +1,100 @@
From 207259b8f08e87b4a741a8b7884e699c95641a2e Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Sat, 13 Apr 2024 00:15:17 +0000
Subject: [PATCH] backends/iommufd: Introduce iommufd_backend_alloc_viommu
Add a helper to allocate a viommu object.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
backends/iommufd.c | 35 +++++++++++++++++++++++++++++++++++
backends/trace-events | 1 +
include/sysemu/iommufd.h | 10 ++++++++++
3 files changed, 46 insertions(+)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index c10aa9b011..82368a3918 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -360,6 +360,41 @@ int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id,
return ret;
}
+struct IOMMUFDViommu *iommufd_backend_alloc_viommu(IOMMUFDBackend *be,
+ uint32_t dev_id,
+ uint32_t viommu_type,
+ uint32_t hwpt_id)
+{
+ int ret, fd = be->fd;
+ struct IOMMUFDViommu *viommu = g_malloc(sizeof(*viommu));
+ struct iommu_viommu_alloc alloc_viommu = {
+ .size = sizeof(alloc_viommu),
+ .type = viommu_type,
+ .dev_id = dev_id,
+ .hwpt_id = hwpt_id,
+ };
+
+ if (!viommu) {
+ error_report("failed to allocate viommu object");
+ return NULL;
+ }
+
+ ret = ioctl(fd, IOMMU_VIOMMU_ALLOC, &alloc_viommu);
+
+ trace_iommufd_backend_alloc_viommu(fd, viommu_type, dev_id, hwpt_id,
+ alloc_viommu.out_viommu_id, ret);
+ if (ret) {
+ error_report("IOMMU_VIOMMU_ALLOC failed: %s", strerror(errno));
+ g_free(viommu);
+ return NULL;
+ }
+
+ viommu->viommu_id = alloc_viommu.out_viommu_id;
+ viommu->s2_hwpt_id = hwpt_id;
+ viommu->iommufd = be;
+ return viommu;
+}
+
bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
uint32_t hwpt_id, Error **errp)
{
diff --git a/backends/trace-events b/backends/trace-events
index ef0ff98921..c24cd378df 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -19,3 +19,4 @@ iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%
iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
iommufd_backend_invalidate_cache(int iommufd, uint32_t hwpt_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d hwpt_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
+iommufd_backend_alloc_viommu(int iommufd, uint32_t type, uint32_t dev_id, uint32_t hwpt_id, uint32_t viommu_id, int ret) " iommufd=%d type=%u dev_id=%u hwpt_id=%u viommu_id=%u (%d)"
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 3dc6934144..05a08c49c2 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -39,6 +39,12 @@ struct IOMMUFDBackend {
/*< public >*/
};
+typedef struct IOMMUFDViommu {
+ IOMMUFDBackend *iommufd;
+ uint32_t s2_hwpt_id;
+ uint32_t viommu_id;
+} IOMMUFDViommu;
+
int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
void iommufd_backend_disconnect(IOMMUFDBackend *be);
@@ -66,6 +72,10 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
int iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t hwpt_id,
uint32_t data_type, uint32_t entry_len,
uint32_t *entry_num, void *data_ptr);
+struct IOMMUFDViommu *iommufd_backend_alloc_viommu(IOMMUFDBackend *be,
+ uint32_t dev_id,
+ uint32_t viommu_type,
+ uint32_t hwpt_id);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass,
--
2.41.0.windows.1

View File

@ -0,0 +1,89 @@
From 005b8f4b6cef11982abcc2c071cbe40b69fb22e7 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Sat, 13 Apr 2024 00:21:22 +0000
Subject: [PATCH] backends/iommufd: Introduce iommufd_vdev_alloc
Add a helper to allocate an iommufd device's virtual device (in the user
space) per a viommu instance.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
backends/iommufd.c | 31 +++++++++++++++++++++++++++++++
backends/trace-events | 1 +
include/sysemu/iommufd.h | 11 +++++++++++
3 files changed, 43 insertions(+)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 82368a3918..af3376d0bf 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -395,6 +395,37 @@ struct IOMMUFDViommu *iommufd_backend_alloc_viommu(IOMMUFDBackend *be,
return viommu;
}
+struct IOMMUFDVdev *iommufd_backend_alloc_vdev(HostIOMMUDeviceIOMMUFD *idev,
+ IOMMUFDViommu *viommu,
+ uint64_t virt_id)
+{
+ int ret, fd = viommu->iommufd->fd;
+ struct IOMMUFDVdev *vdev = g_malloc(sizeof(*vdev));
+ struct iommu_vdevice_alloc alloc_vdev = {
+ .size = sizeof(alloc_vdev),
+ .viommu_id = viommu->viommu_id,
+ .dev_id = idev->devid,
+ .virt_id = virt_id,
+ };
+
+ ret = ioctl(fd, IOMMU_VDEVICE_ALLOC, &alloc_vdev);
+
+ trace_iommufd_backend_alloc_vdev(fd, idev->devid, viommu->viommu_id, virt_id,
+ alloc_vdev.out_vdevice_id, ret);
+
+ if (ret) {
+ error_report("IOMMU_VDEVICE_ALLOC failed: %s", strerror(errno));
+ g_free(vdev);
+ return NULL;
+ }
+
+ vdev->idev = idev;
+ vdev->viommu = viommu;
+ vdev->virt_id = virt_id;
+ vdev->vdev_id = alloc_vdev.out_vdevice_id;
+ return vdev;
+}
+
bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
uint32_t hwpt_id, Error **errp)
{
diff --git a/backends/trace-events b/backends/trace-events
index c24cd378df..e150a37e9a 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -20,3 +20,4 @@ iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) "
iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
iommufd_backend_invalidate_cache(int iommufd, uint32_t hwpt_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d hwpt_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
iommufd_backend_alloc_viommu(int iommufd, uint32_t type, uint32_t dev_id, uint32_t hwpt_id, uint32_t viommu_id, int ret) " iommufd=%d type=%u dev_id=%u hwpt_id=%u viommu_id=%u (%d)"
+iommufd_backend_alloc_vdev(int iommufd, uint32_t dev_id, uint32_t viommu_id, uint64_t virt_id, uint32_t vdev_id, int ret) " iommufd=%d dev_id=%u viommu_id=%u virt_id=0x%"PRIx64" vdev_id=%u (%d)"
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 05a08c49c2..0284e95460 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -128,4 +128,15 @@ bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
uint32_t hwpt_id, Error **errp);
bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
Error **errp);
+
+typedef struct IOMMUFDVdev {
+ HostIOMMUDeviceIOMMUFD *idev;
+ IOMMUFDViommu *viommu;
+ uint32_t vdev_id;
+ uint64_t virt_id;
+} IOMMUFDVdev;
+
+struct IOMMUFDVdev *iommufd_backend_alloc_vdev(HostIOMMUDeviceIOMMUFD *idev,
+ IOMMUFDViommu *viommu,
+ uint64_t virt_id);
#endif
--
2.41.0.windows.1

View File

@ -0,0 +1,84 @@
From 2be28f75e4ed2a0a35549dd1a545e0655e63973d Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Fri, 12 Apr 2024 23:27:54 +0000
Subject: [PATCH] backends/iommufd: Introduce iommufd_viommu_invalidate_cache
Similar to iommufd_backend_invalidate_cache for iotlb invalidation via
IOMMU_HWPT_INVALIDATE ioctl, add a new helper for viommu specific cache
invalidation via IOMMU_VIOMMU_INVALIDATE ioctl.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
backends/iommufd.c | 31 +++++++++++++++++++++++++++++++
backends/trace-events | 1 +
include/sysemu/iommufd.h | 3 +++
3 files changed, 35 insertions(+)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index af3376d0bf..ee6f5bcf65 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -426,6 +426,37 @@ struct IOMMUFDVdev *iommufd_backend_alloc_vdev(HostIOMMUDeviceIOMMUFD *idev,
return vdev;
}
+int iommufd_viommu_invalidate_cache(IOMMUFDBackend *be, uint32_t viommu_id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data_ptr)
+{
+ int ret, fd = be->fd;
+ struct iommu_hwpt_invalidate cache = {
+ .size = sizeof(cache),
+ .hwpt_id = viommu_id,
+ .data_type = data_type,
+ .entry_len = entry_len,
+ .entry_num = *entry_num,
+ .data_uptr = (uint64_t)data_ptr,
+ };
+
+ ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
+
+ trace_iommufd_viommu_invalidate_cache(fd, viommu_id, data_type,
+ entry_len, *entry_num,
+ cache.entry_num,
+ (uint64_t)data_ptr, ret);
+ if (ret) {
+ *entry_num = cache.entry_num;
+ error_report("IOMMU_VIOMMU_INVALIDATE failed: %s", strerror(errno));
+ ret = -errno;
+ } else {
+ g_assert(*entry_num == cache.entry_num);
+ }
+
+ return ret;
+}
+
bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
uint32_t hwpt_id, Error **errp)
{
diff --git a/backends/trace-events b/backends/trace-events
index e150a37e9a..f8592a2711 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -21,3 +21,4 @@ iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, u
iommufd_backend_invalidate_cache(int iommufd, uint32_t hwpt_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d hwpt_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
iommufd_backend_alloc_viommu(int iommufd, uint32_t type, uint32_t dev_id, uint32_t hwpt_id, uint32_t viommu_id, int ret) " iommufd=%d type=%u dev_id=%u hwpt_id=%u viommu_id=%u (%d)"
iommufd_backend_alloc_vdev(int iommufd, uint32_t dev_id, uint32_t viommu_id, uint64_t virt_id, uint32_t vdev_id, int ret) " iommufd=%d dev_id=%u viommu_id=%u virt_id=0x%"PRIx64" vdev_id=%u (%d)"
+iommufd_viommu_invalidate_cache(int iommufd, uint32_t viommu_id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d viommu_id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 0284e95460..0f2c826036 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -76,6 +76,9 @@ struct IOMMUFDViommu *iommufd_backend_alloc_viommu(IOMMUFDBackend *be,
uint32_t dev_id,
uint32_t viommu_type,
uint32_t hwpt_id);
+int iommufd_viommu_invalidate_cache(IOMMUFDBackend *be, uint32_t viommu_id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data_ptr);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass,
--
2.41.0.windows.1

View File

@ -0,0 +1,468 @@
From 6cb41a55992571dd215fee86ed910bb4d6688bf8 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Sat, 11 Jan 2025 10:52:37 +0800
Subject: [PATCH] backends/iommufd: Introduce the iommufd object
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Introduce an iommufd object which allows the interaction
with the host /dev/iommu device.
The /dev/iommu can have been already pre-opened outside of qemu,
in which case the fd can be passed directly along with the
iommufd object:
This allows the iommufd object to be shared accross several
subsystems (VFIO, VDPA, ...). For example, libvirt would open
the /dev/iommu once.
If no fd is passed along with the iommufd object, the /dev/iommu
is opened by the qemu code.
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
---
MAINTAINERS | 8 ++
backends/Kconfig | 4 +
backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++
backends/meson.build | 1 +
backends/trace-events | 10 ++
include/sysemu/iommufd.h | 38 ++++++
qapi/qom.json | 19 +++
qemu-options.hx | 12 ++
8 files changed, 337 insertions(+)
create mode 100644 backends/iommufd.c
create mode 100644 include/sysemu/iommufd.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 695e0bd34f..a5a446914a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c
F: docs/system/s390x/vfio-ap.rst
L: qemu-s390x@nongnu.org
+iommufd
+M: Yi Liu <yi.l.liu@intel.com>
+M: Eric Auger <eric.auger@redhat.com>
+M: Zhenzhong Duan <zhenzhong.duan@intel.com>
+S: Supported
+F: backends/iommufd.c
+F: include/sysemu/iommufd.h
+
vhost
M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
diff --git a/backends/Kconfig b/backends/Kconfig
index f35abc1609..2cb23f62fa 100644
--- a/backends/Kconfig
+++ b/backends/Kconfig
@@ -1 +1,5 @@
source tpm/Kconfig
+
+config IOMMUFD
+ bool
+ depends on VFIO
diff --git a/backends/iommufd.c b/backends/iommufd.c
new file mode 100644
index 0000000000..ba58a0eb0d
--- /dev/null
+++ b/backends/iommufd.c
@@ -0,0 +1,245 @@
+/*
+ * iommufd container backend
+ *
+ * Copyright (C) 2023 Intel Corporation.
+ * Copyright Red Hat, Inc. 2023
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ * Eric Auger <eric.auger@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/iommufd.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/module.h"
+#include "qom/object_interfaces.h"
+#include "qemu/error-report.h"
+#include "monitor/monitor.h"
+#include "trace.h"
+#include <sys/ioctl.h>
+#include <linux/iommufd.h>
+
+static void iommufd_backend_init(Object *obj)
+{
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
+
+ be->fd = -1;
+ be->users = 0;
+ be->owned = true;
+ qemu_mutex_init(&be->lock);
+}
+
+static void iommufd_backend_finalize(Object *obj)
+{
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
+
+ if (be->owned) {
+ close(be->fd);
+ be->fd = -1;
+ }
+}
+
+static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
+{
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
+ int fd = -1;
+
+ fd = monitor_fd_param(monitor_cur(), str, errp);
+ if (fd == -1) {
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
+ return;
+ }
+ qemu_mutex_lock(&be->lock);
+ be->fd = fd;
+ be->owned = false;
+ qemu_mutex_unlock(&be->lock);
+ trace_iommu_backend_set_fd(be->fd);
+}
+
+static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
+{
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
+
+ return !be->users;
+}
+
+static void iommufd_backend_class_init(ObjectClass *oc, void *data)
+{
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+ ucc->can_be_deleted = iommufd_backend_can_be_deleted;
+
+ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
+}
+
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
+{
+ int fd, ret = 0;
+
+ qemu_mutex_lock(&be->lock);
+ if (be->users == UINT32_MAX) {
+ error_setg(errp, "too many connections");
+ ret = -E2BIG;
+ goto out;
+ }
+ if (be->owned && !be->users) {
+ fd = qemu_open_old("/dev/iommu", O_RDWR);
+ if (fd < 0) {
+ error_setg_errno(errp, errno, "/dev/iommu opening failed");
+ ret = fd;
+ goto out;
+ }
+ be->fd = fd;
+ }
+ be->users++;
+out:
+ trace_iommufd_backend_connect(be->fd, be->owned,
+ be->users, ret);
+ qemu_mutex_unlock(&be->lock);
+ return ret;
+}
+
+void iommufd_backend_disconnect(IOMMUFDBackend *be)
+{
+ qemu_mutex_lock(&be->lock);
+ if (!be->users) {
+ goto out;
+ }
+ be->users--;
+ if (!be->users && be->owned) {
+ close(be->fd);
+ be->fd = -1;
+ }
+out:
+ trace_iommufd_backend_disconnect(be->fd, be->users);
+ qemu_mutex_unlock(&be->lock);
+}
+
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
+ Error **errp)
+{
+ int ret, fd = be->fd;
+ struct iommu_ioas_alloc alloc_data = {
+ .size = sizeof(alloc_data),
+ .flags = 0,
+ };
+
+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data);
+ if (ret) {
+ error_setg_errno(errp, errno, "Failed to allocate ioas");
+ return ret;
+ }
+
+ *ioas_id = alloc_data.out_ioas_id;
+ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret);
+
+ return ret;
+}
+
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
+{
+ int ret, fd = be->fd;
+ struct iommu_destroy des = {
+ .size = sizeof(des),
+ .id = id,
+ };
+
+ ret = ioctl(fd, IOMMU_DESTROY, &des);
+ trace_iommufd_backend_free_id(fd, id, ret);
+ if (ret) {
+ error_report("Failed to free id: %u %m", id);
+ }
+}
+
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly)
+{
+ int ret, fd = be->fd;
+ struct iommu_ioas_map map = {
+ .size = sizeof(map),
+ .flags = IOMMU_IOAS_MAP_READABLE |
+ IOMMU_IOAS_MAP_FIXED_IOVA,
+ .ioas_id = ioas_id,
+ .__reserved = 0,
+ .user_va = (uintptr_t)vaddr,
+ .iova = iova,
+ .length = size,
+ };
+
+ if (!readonly) {
+ map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
+ }
+
+ ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
+ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
+ vaddr, readonly, ret);
+ if (ret) {
+ ret = -errno;
+
+ /* TODO: Not support mapping hardware PCI BAR region for now. */
+ if (errno == EFAULT) {
+ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
+ } else {
+ error_report("IOMMU_IOAS_MAP failed: %m");
+ }
+ }
+ return ret;
+}
+
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
+ hwaddr iova, ram_addr_t size)
+{
+ int ret, fd = be->fd;
+ struct iommu_ioas_unmap unmap = {
+ .size = sizeof(unmap),
+ .ioas_id = ioas_id,
+ .iova = iova,
+ .length = size,
+ };
+
+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
+ /*
+ * IOMMUFD takes mapping as some kind of object, unmapping
+ * nonexistent mapping is treated as deleting a nonexistent
+ * object and return ENOENT. This is different from legacy
+ * backend which allows it. vIOMMU may trigger a lot of
+ * redundant unmapping, to avoid flush the log, treat them
+ * as succeess for IOMMUFD just like legacy backend.
+ */
+ if (ret && errno == ENOENT) {
+ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
+ ret = 0;
+ } else {
+ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
+ }
+
+ if (ret) {
+ ret = -errno;
+ error_report("IOMMU_IOAS_UNMAP failed: %m");
+ }
+ return ret;
+}
+
+static const TypeInfo iommufd_backend_info = {
+ .name = TYPE_IOMMUFD_BACKEND,
+ .parent = TYPE_OBJECT,
+ .instance_size = sizeof(IOMMUFDBackend),
+ .instance_init = iommufd_backend_init,
+ .instance_finalize = iommufd_backend_finalize,
+ .class_size = sizeof(IOMMUFDBackendClass),
+ .class_init = iommufd_backend_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+static void register_types(void)
+{
+ type_register_static(&iommufd_backend_info);
+}
+
+type_init(register_types);
diff --git a/backends/meson.build b/backends/meson.build
index 914c7c4afb..9a5cea480d 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -20,6 +20,7 @@ if have_vhost_user
system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
endif
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
+system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c'))
if have_vhost_user_crypto
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
endif
diff --git a/backends/trace-events b/backends/trace-events
index 652eb76a57..d45c6e31a6 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void)
dbus_vmstate_post_load(int version_id) "version_id: %d"
dbus_vmstate_loading(const char *id) "id: %s"
dbus_vmstate_saving(const char *id) "id: %s"
+
+# iommufd.c
+iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)"
+iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
+iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
+iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
+iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
+iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
+iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
+iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
new file mode 100644
index 0000000000..9c5524b0ed
--- /dev/null
+++ b/include/sysemu/iommufd.h
@@ -0,0 +1,38 @@
+#ifndef SYSEMU_IOMMUFD_H
+#define SYSEMU_IOMMUFD_H
+
+#include "qom/object.h"
+#include "qemu/thread.h"
+#include "exec/hwaddr.h"
+#include "exec/cpu-common.h"
+
+#define TYPE_IOMMUFD_BACKEND "iommufd"
+OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
+
+struct IOMMUFDBackendClass {
+ ObjectClass parent_class;
+};
+
+struct IOMMUFDBackend {
+ Object parent;
+
+ /*< protected >*/
+ int fd; /* /dev/iommu file descriptor */
+ bool owned; /* is the /dev/iommu opened internally */
+ QemuMutex lock;
+ uint32_t users;
+
+ /*< public >*/
+};
+
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
+void iommufd_backend_disconnect(IOMMUFDBackend *be);
+
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
+ Error **errp);
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly);
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
+ hwaddr iova, ram_addr_t size);
+#endif
diff --git a/qapi/qom.json b/qapi/qom.json
index a74c7a91f9..a5336e6b11 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -794,6 +794,23 @@
{ 'struct': 'VfioUserServerProperties',
'data': { 'socket': 'SocketAddress', 'device': 'str' } }
+##
+# @IOMMUFDProperties:
+#
+# Properties for iommufd objects.
+#
+# @fd: file descriptor name previously passed via 'getfd' command,
+# which represents a pre-opened /dev/iommu. This allows the
+# iommufd object to be shared accross several subsystems
+# (VFIO, VDPA, ...), and the file descriptor to be shared
+# with other process, e.g. DPDK. (default: QEMU opens
+# /dev/iommu by itself)
+#
+# Since: 9.0
+##
+{ 'struct': 'IOMMUFDProperties',
+ 'data': { '*fd': 'str' } }
+
##
# @RngProperties:
#
@@ -969,6 +986,7 @@
'input-barrier',
{ 'name': 'input-linux',
'if': 'CONFIG_LINUX' },
+ 'iommufd',
'iothread',
'main-loop',
{ 'name': 'memory-backend-epc',
@@ -1039,6 +1057,7 @@
'input-barrier': 'InputBarrierProperties',
'input-linux': { 'type': 'InputLinuxProperties',
'if': 'CONFIG_LINUX' },
+ 'iommufd': 'IOMMUFDProperties',
'iothread': 'IothreadProperties',
'main-loop': 'MainLoopProperties',
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
diff --git a/qemu-options.hx b/qemu-options.hx
index 8516b73206..7fe76c4b1d 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -5224,6 +5224,18 @@ SRST
The ``share`` boolean option is on by default with memfd.
+ ``-object iommufd,id=id[,fd=fd]``
+ Creates an iommufd backend which allows control of DMA mapping
+ through the ``/dev/iommu`` device.
+
+ The ``id`` parameter is a unique ID which frontends (such as
+ vfio-pci of vdpa) will use to connect with the iommufd backend.
+
+ The ``fd`` parameter is an optional pre-opened file descriptor
+ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared
+ across all subsystems, bringing the benefit of centralized
+ reference counting.
+
``-object rng-builtin,id=id``
Creates a random number generator backend which obtains entropy
from QEMU builtin functions. The ``id`` parameter is a unique ID
--
2.41.0.windows.1

View File

@ -0,0 +1,52 @@
From ca3f4fd234ea4b8f02a415b99b449e71d028c076 Mon Sep 17 00:00:00 2001
From: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
Date: Tue, 8 Apr 2025 07:27:47 -0400
Subject: [PATCH] cryptodev: Fix error handling in
cryptodev_lkcf_execute_task()
cheery-pick from 1c89dfefc4c33295126208225f202f39b5a234c3
When cryptodev_lkcf_set_op_desc() fails, we report an error, but
continue anyway. This is wrong. We then pass a non-null @local_error
to various functions, which could easily fail error_setv()'s assertion
on failure.
Fail the function instead.
When qcrypto_akcipher_new() fails, we fail the function without
reporting the error. This leaks the Error object.
Add the missing error reporting. This also frees the Error object.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-ID: <20250312101131.1615777-1-armbru@redhat.com>
Reviewed-by: zhenwei pi <pizhenwei@bytedance.com>
Signed-off-by: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
---
backends/cryptodev-lkcf.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/backends/cryptodev-lkcf.c b/backends/cryptodev-lkcf.c
index 45aba1ff67..45b287a953 100644
--- a/backends/cryptodev-lkcf.c
+++ b/backends/cryptodev-lkcf.c
@@ -330,6 +330,8 @@ static void cryptodev_lkcf_execute_task(CryptoDevLKCFTask *task)
cryptodev_lkcf_set_op_desc(&session->akcipher_opts, op_desc,
sizeof(op_desc), &local_error) != 0) {
error_report_err(local_error);
+ status = -VIRTIO_CRYPTO_ERR;
+ goto out;
} else {
key_id = add_key(KCTL_KEY_TYPE_PKEY, "lkcf-backend-priv-key",
p8info, p8info_len, KCTL_KEY_RING);
@@ -346,6 +348,7 @@ static void cryptodev_lkcf_execute_task(CryptoDevLKCFTask *task)
session->key, session->keylen,
&local_error);
if (!akcipher) {
+ error_report_err(local_error);
status = -VIRTIO_CRYPTO_ERR;
goto out;
}
--
2.41.0.windows.1

View File

@ -0,0 +1,220 @@
From fd1d6d64803a052adcab8c7993ca40cabc9c926d Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Sat, 11 Jan 2025 10:53:03 +0800
Subject: [PATCH] docs/devel: Add VFIO iommufd backend documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
---
MAINTAINERS | 1 +
docs/devel/index-internals.rst | 1 +
docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++
3 files changed, 168 insertions(+)
create mode 100644 docs/devel/vfio-iommufd.rst
diff --git a/MAINTAINERS b/MAINTAINERS
index ca70bb4e64..0ddb20a35f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2176,6 +2176,7 @@ F: backends/iommufd.c
F: include/sysemu/iommufd.h
F: include/qemu/chardev_open.h
F: util/chardev_open.c
+F: docs/devel/vfio-iommufd.rst
vhost
M: Michael S. Tsirkin <mst@redhat.com>
diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst
index 6f81df92bc..3def4a138b 100644
--- a/docs/devel/index-internals.rst
+++ b/docs/devel/index-internals.rst
@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them.
s390-dasd-ipl
tracing
vfio-migration
+ vfio-iommufd
writing-monitor-commands
virtio-backends
diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst
new file mode 100644
index 0000000000..3d1c11f175
--- /dev/null
+++ b/docs/devel/vfio-iommufd.rst
@@ -0,0 +1,166 @@
+===============================
+IOMMUFD BACKEND usage with VFIO
+===============================
+
+(Same meaning for backend/container/BE)
+
+With the introduction of iommufd, the Linux kernel provides a generic
+interface for user space drivers to propagate their DMA mappings to kernel
+for assigned devices. While the legacy kernel interface is group-centric,
+the new iommufd interface is device-centric, relying on device fd and iommufd.
+
+To support both interfaces in the QEMU VFIO device, introduce a base container
+to abstract the common part of VFIO legacy and iommufd container. So that the
+generic VFIO code can use either container.
+
+The base container implements generic functions such as memory_listener and
+address space management whereas the derived container implements callbacks
+specific to either legacy or iommufd. Each container has its own way to setup
+secure context and dma management interface. The below diagram shows how it
+looks like with both containers.
+
+::
+
+ VFIO AddressSpace/Memory
+ +-------+ +----------+ +-----+ +-----+
+ | pci | | platform | | ap | | ccw |
+ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+
+ | | | | | AddressSpace |
+ | | | | +------------+---------+
+ +---V-----------V-----------V--------V----+ /
+ | VFIOAddressSpace | <------------+
+ | | | MemoryListener
+ | VFIOContainerBase list |
+ +-------+----------------------------+----+
+ | |
+ | |
+ +-------V------+ +--------V----------+
+ | iommufd | | vfio legacy |
+ | container | | container |
+ +-------+------+ +--------+----------+
+ | |
+ | /dev/iommu | /dev/vfio/vfio
+ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id
+ Userspace | |
+ ============+============================+===========================
+ Kernel | device fd |
+ +---------------+ | group/container fd
+ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU)
+ | ATTACH_IOAS) | | device fd
+ | | |
+ | +-------V------------V-----------------+
+ iommufd | | vfio |
+ (map/unmap | +---------+--------------------+-------+
+ ioas_copy) | | | map/unmap
+ | | |
+ +------V------+ +-----V------+ +------V--------+
+ | iommfd core | | device | | vfio iommu |
+ +-------------+ +------------+ +---------------+
+
+* Secure Context setup
+
+ - iommufd BE: uses device fd and iommufd to setup secure context
+ (bind_iommufd, attach_ioas)
+ - vfio legacy BE: uses group fd and container fd to setup secure context
+ (set_container, set_iommu)
+
+* Device access
+
+ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX``
+ - vfio legacy BE: device fd is retrieved from group fd ioctl
+
+* DMA Mapping flow
+
+ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener
+ 2. VFIO populates DMA map/unmap via the container BEs
+ * iommufd BE: uses iommufd
+ * vfio legacy BE: uses container fd
+
+Example configuration
+=====================
+
+Step 1: configure the host device
+---------------------------------
+
+It's exactly same as the VFIO device with legacy VFIO container.
+
+Step 2: configure QEMU
+----------------------
+
+Interactions with the ``/dev/iommu`` are abstracted by a new iommufd
+object (compiled in with the ``CONFIG_IOMMUFD`` option).
+
+Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must
+be linked with an iommufd object. It gets a new optional property
+named iommufd which allows to pass an iommufd object. Take ``vfio-pci``
+device for example:
+
+.. code-block:: bash
+
+ -object iommufd,id=iommufd0
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0
+
+Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a
+management layer. In such a case the fd is passed, the fd supports a
+string naming the fd or a number, for example:
+
+.. code-block:: bash
+
+ -object iommufd,id=iommufd0,fd=22
+ -device vfio-pci,iommufd=iommufd0,fd=23
+
+If the ``fd`` property is not passed, the fd is opened by QEMU.
+
+If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd
+is not used and the user gets the behavior based on the legacy VFIO
+container:
+
+.. code-block:: bash
+
+ -device vfio-pci,host=0000:02:00.0
+
+Supported platform
+==================
+
+Supports x86, ARM and s390x currently.
+
+Caveats
+=======
+
+Dirty page sync
+---------------
+
+Dirty page sync with iommufd backend is unsupported yet, live migration is
+disabled by default. But it can be force enabled like below, low efficient
+though.
+
+.. code-block:: bash
+
+ -object iommufd,id=iommufd0
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on
+
+P2P DMA
+-------
+
+PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI
+BAR region yet. Below warning shows for assigned PCI device, it's not a bug.
+
+.. code-block:: none
+
+ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR?
+ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address)
+
+FD passing with mdev
+--------------------
+
+``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev.
+If FD passing is used, there is no way to know that and the mdev is treated
+like a real PCI device. There is an error as below if user wants to enable
+RAM discarding for mdev.
+
+.. code-block:: none
+
+ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices
+
+``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend
+devices are always mdev and RAM discarding is force enabled.
--
2.41.0.windows.1

View File

@ -0,0 +1,57 @@
From b1087bb8a4edbacc7240c0fcab63bc1cf2624627 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Tue, 21 Jan 2025 14:42:45 +0000
Subject: [PATCH] gpex-acpi: Remove duplicate DSM #5
It looks like acpi_dsdt_add_pci_osc() already builds the _DSM
for virt/gpex case, and we don't need to add duplicate DSM methods
for _DSM #5 case.
And the acpi_dsdt_add_pci_osc() already adds _DSM #5 when
preserve_config is true.
This is to get rid of the ACPI related error messages during boot:
ACPI BIOS Error (bug): Failure creating named object [\_SB.PC08._DSM], AE_ALREADY_EXISTS
ACPI BIOS Error (bug): \_SB.PC08.PCI0._DSM: Excess arguments - ASL declared 5, ACPI requires 4
ToDo: Only sanity tested.
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/pci-host/gpex-acpi.c | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index ce424fc9da..162f6221ab 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -189,12 +189,6 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node)));
}
- if (cfg->preserve_config) {
- method = aml_method("_DSM", 5, AML_SERIALIZED);
- aml_append(method, aml_return(aml_int(0)));
- aml_append(dev, method);
- }
-
acpi_dsdt_add_pci_route_table(dev, cfg->irq);
/*
@@ -226,12 +220,6 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device")));
aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
- if (cfg->preserve_config) {
- method = aml_method("_DSM", 5, AML_SERIALIZED);
- aml_append(method, aml_return(aml_int(0)));
- aml_append(dev, method);
- }
-
acpi_dsdt_add_pci_route_table(dev, cfg->irq);
method = aml_method("_CBA", 0, AML_NOTSERIALIZED);
--
2.41.0.windows.1

View File

@ -0,0 +1,34 @@
From bcb031b40fe40d5b6347b2134fb039945b87e8a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Sat, 11 Jan 2025 10:52:55 +0800
Subject: [PATCH] hw/arm: Activate IOMMUFD for virt machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
---
hw/arm/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index c0a7d0bd58..4a0ea0628f 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -8,6 +8,7 @@ config ARM_VIRT
imply TPM_TIS_SYSBUS
imply TPM_TIS_I2C
imply NVDIMM
+ imply IOMMUFD
select ARM_GIC
select ACPI
select ARM_SMMUV3
--
2.41.0.windows.1

View File

@ -0,0 +1,67 @@
From d589010512005bfc698f30417911e4b14478c81b Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 22 Jun 2022 01:30:39 -0700
Subject: [PATCH] hw/arm/smmu-common: Add a nested flag to SMMUState
Add a nested flag in the SMMUState, passed in from device property.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmu-common.c | 1 +
hw/arm/smmuv3.c | 5 +++++
include/hw/arm/smmu-common.h | 4 ++++
3 files changed, 10 insertions(+)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 9a8ac45431..c5f3e02065 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -683,6 +683,7 @@ static Property smmu_dev_properties[] = {
DEFINE_PROP_UINT8("bus_num", SMMUState, bus_num, 0),
DEFINE_PROP_LINK("primary-bus", SMMUState, primary_bus,
TYPE_PCI_BUS, PCIBus *),
+ DEFINE_PROP_BOOL("nested", SMMUState, nested, false),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index c3871ae067..64ca4c5542 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1746,6 +1746,11 @@ static void smmu_realize(DeviceState *d, Error **errp)
SysBusDevice *dev = SYS_BUS_DEVICE(d);
Error *local_err = NULL;
+ if (s->stage && strcmp("1", s->stage)) {
+ /* Only support nested with an stage1 only vSMMU */
+ sys->nested = false;
+ }
+
c->parent_realize(d, &local_err);
if (local_err) {
error_propagate(errp, local_err);
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index fd8d772da1..eae5d4d05b 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -22,6 +22,7 @@
#include "hw/sysbus.h"
#include "hw/pci/pci.h"
#include "qom/object.h"
+#include "sysemu/iommufd.h"
#define SMMU_PCI_BUS_MAX 256
#define SMMU_PCI_DEVFN_MAX 256
@@ -136,6 +137,9 @@ struct SMMUState {
const char *mrtypename;
MemoryRegion iomem;
+ /* Nested SMMU */
+ bool nested;
+
GHashTable *smmu_pcibus_by_busptr;
GHashTable *configs; /* cache for configuration data */
GHashTable *iotlb;
--
2.41.0.windows.1

View File

@ -0,0 +1,179 @@
From a2735cd15160a62065a0a0b39af405c7b0f3fae8 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 22 Jun 2022 14:41:27 -0700
Subject: [PATCH] hw/arm/smmu-common: Add iommufd helpers
Add a set of helper functions for IOMMUFD and new "struct SMMUS1Hwpt"
to store the nested hwpt information.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmu-common.c | 108 +++++++++++++++++++++++++++++++++++
include/hw/arm/smmu-common.h | 20 +++++++
2 files changed, 128 insertions(+)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 038ae857d8..a79eb34277 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -838,6 +838,114 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid)
return NULL;
}
+/* IOMMUFD helpers */
+int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type,
+ uint32_t data_len, void *data)
+{
+ uint64_t caps;
+
+ if (!sdev || !sdev->idev) {
+ return -ENOENT;
+ }
+
+ return !iommufd_backend_get_device_info(sdev->idev->iommufd,
+ sdev->idev->devid, data_type, data,
+ data_len, &caps, NULL);
+}
+
+void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort)
+{
+ HostIOMMUDeviceIOMMUFD *idev = sdev->idev;
+ SMMUS1Hwpt *s1_hwpt = sdev->s1_hwpt;
+ uint32_t hwpt_id;
+
+ if (!s1_hwpt || !sdev->viommu) {
+ return;
+ }
+
+ if (abort) {
+ hwpt_id = sdev->viommu->abort_hwpt_id;
+ } else {
+ hwpt_id = sdev->viommu->bypass_hwpt_id;
+ }
+
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, NULL)) {
+ return;
+ }
+
+ iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
+ sdev->s1_hwpt = NULL;
+ g_free(s1_hwpt);
+}
+
+int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type,
+ uint32_t data_len, void *data)
+{
+ SMMUViommu *viommu = sdev->viommu;
+ SMMUS1Hwpt *s1_hwpt = sdev->s1_hwpt;
+ HostIOMMUDeviceIOMMUFD *idev = sdev->idev;
+
+ if (!idev || !viommu) {
+ return -ENOENT;
+ }
+
+ if (s1_hwpt) {
+ smmu_dev_uninstall_nested_ste(sdev, false);
+ }
+
+ s1_hwpt = g_new0(SMMUS1Hwpt, 1);
+ if (!s1_hwpt) {
+ return -ENOMEM;
+ }
+
+ s1_hwpt->smmu = sdev->smmu;
+ s1_hwpt->viommu = viommu;
+ s1_hwpt->iommufd = idev->iommufd;
+
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+ viommu->core->viommu_id, 0, data_type,
+ data_len, data, &s1_hwpt->hwpt_id, NULL)) {
+ goto free;
+ }
+
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt->hwpt_id, NULL)) {
+ goto free_hwpt;
+ }
+
+ sdev->s1_hwpt = s1_hwpt;
+
+ return 0;
+free_hwpt:
+ iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
+free:
+ sdev->s1_hwpt = NULL;
+ g_free(s1_hwpt);
+
+ return -EINVAL;
+}
+
+int smmu_hwpt_invalidate_cache(SMMUS1Hwpt *s1_hwpt, uint32_t type, uint32_t len,
+ uint32_t *num, void *reqs)
+{
+ if (!s1_hwpt) {
+ return -ENOENT;
+ }
+
+ return iommufd_backend_invalidate_cache(s1_hwpt->iommufd, s1_hwpt->hwpt_id,
+ type, len, num, reqs);
+}
+
+int smmu_viommu_invalidate_cache(IOMMUFDViommu *viommu, uint32_t type,
+ uint32_t len, uint32_t *num, void *reqs)
+{
+ if (!viommu) {
+ return -ENOENT;
+ }
+
+ return iommufd_viommu_invalidate_cache(viommu->iommufd, viommu->viommu_id,
+ type, len, num, reqs);
+}
+
/* Unmap all notifiers attached to @mr */
static void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr)
{
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 3bfb68cef6..66dc7206ea 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -125,6 +125,15 @@ typedef struct SMMUViommu {
QLIST_ENTRY(SMMUViommu) next;
} SMMUViommu;
+typedef struct SMMUS1Hwpt {
+ void *smmu;
+ IOMMUFDBackend *iommufd;
+ SMMUViommu *viommu;
+ uint32_t hwpt_id;
+ QLIST_HEAD(, SMMUDevice) device_list;
+ QLIST_ENTRY(SMMUViommu) next;
+} SMMUS1Hwpt;
+
typedef struct SMMUDevice {
void *smmu;
PCIBus *bus;
@@ -132,6 +141,7 @@ typedef struct SMMUDevice {
IOMMUMemoryRegion iommu;
HostIOMMUDeviceIOMMUFD *idev;
SMMUViommu *viommu;
+ SMMUS1Hwpt *s1_hwpt;
AddressSpace as;
uint32_t cfg_cache_hits;
uint32_t cfg_cache_misses;
@@ -225,4 +235,14 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
/* Unmap the range of all the notifiers registered to any IOMMU mr */
void smmu_inv_notifiers_all(SMMUState *s);
+/* IOMMUFD helpers */
+int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type,
+ uint32_t data_len, void *data);
+void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort);
+int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type,
+ uint32_t data_len, void *data);
+int smmu_hwpt_invalidate_cache(SMMUS1Hwpt *s1_hwpt, uint32_t type, uint32_t len,
+ uint32_t *num, void *reqs);
+int smmu_viommu_invalidate_cache(IOMMUFDViommu *viommu, uint32_t type,
+ uint32_t len, uint32_t *num, void *reqs);
#endif /* HW_ARM_SMMU_COMMON_H */
--
2.41.0.windows.1

View File

@ -0,0 +1,283 @@
From 539e12641dc2db30a6fea7a0f061e163bc245d79 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 22 Jun 2022 02:16:52 -0700
Subject: [PATCH] hw/arm/smmu-common: Add set/unset_iommu_device callback
Implement a set_iommu_device callback:
- Find an existing S2 hwpt to test attach() or allocate a new one
(Devices behind the same physical SMMU should share an S2 HWPT.)
- Attach the device to the S2 hwpt and add it to its device list
And add an unset_iommu_device doing the opposite cleanup routine.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmu-common.c | 177 +++++++++++++++++++++++++++++++++++
hw/arm/trace-events | 2 +
include/hw/arm/smmu-common.h | 21 +++++
3 files changed, 200 insertions(+)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 03d9ff58d4..038ae857d8 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -20,6 +20,7 @@
#include "trace.h"
#include "exec/target_page.h"
#include "hw/core/cpu.h"
+#include "hw/pci/pci_device.h"
#include "hw/qdev-properties.h"
#include "qapi/error.h"
#include "qemu/jhash.h"
@@ -639,8 +640,184 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn)
return &sdev->as;
}
+static bool smmu_dev_attach_viommu(SMMUDevice *sdev,
+ HostIOMMUDeviceIOMMUFD *idev, Error **errp)
+{
+ struct iommu_hwpt_arm_smmuv3 bypass_data = {
+ .ste = { 0x9ULL, 0x0ULL }, //0x1ULL << (108 - 64) },
+ };
+ struct iommu_hwpt_arm_smmuv3 abort_data = {
+ .ste = { 0x1ULL, 0x0ULL },
+ };
+ SMMUState *s = sdev->smmu;
+ SMMUS2Hwpt *s2_hwpt;
+ SMMUViommu *viommu;
+ uint32_t s2_hwpt_id;
+
+ if (s->viommu) {
+ return host_iommu_device_iommufd_attach_hwpt(
+ idev, s->viommu->s2_hwpt->hwpt_id, errp);
+ }
+
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT,
+ IOMMU_HWPT_DATA_NONE, 0, NULL,
+ &s2_hwpt_id, errp)) {
+ error_setg(errp, "failed to allocate an S2 hwpt");
+ return false;
+ }
+
+ /* Attach to S2 for MSI cookie */
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id, errp)) {
+ error_setg(errp, "failed to attach stage-2 HW pagetable");
+ goto free_s2_hwpt;
+ }
+
+ viommu = g_new0(SMMUViommu, 1);
+
+ viommu->core = iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
+ IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
+ s2_hwpt_id);
+ if (!viommu->core) {
+ error_setg(errp, "failed to allocate a viommu");
+ goto free_viommu;
+ }
+
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+ viommu->core->viommu_id, 0,
+ IOMMU_HWPT_DATA_ARM_SMMUV3,
+ sizeof(abort_data), &abort_data,
+ &viommu->abort_hwpt_id, errp)) {
+ error_setg(errp, "failed to allocate an abort pagetable");
+ goto free_viommu_core;
+ }
+
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+ viommu->core->viommu_id, 0,
+ IOMMU_HWPT_DATA_ARM_SMMUV3,
+ sizeof(bypass_data), &bypass_data,
+ &viommu->bypass_hwpt_id, errp)) {
+ error_setg(errp, "failed to allocate a bypass pagetable");
+ goto free_abort_hwpt;
+ }
+
+ if (!host_iommu_device_iommufd_attach_hwpt(
+ idev, viommu->bypass_hwpt_id, errp)) {
+ error_setg(errp, "failed to attach the bypass pagetable");
+ goto free_bypass_hwpt;
+ }
+
+ s2_hwpt = g_new0(SMMUS2Hwpt, 1);
+ s2_hwpt->iommufd = idev->iommufd;
+ s2_hwpt->hwpt_id = s2_hwpt_id;
+ s2_hwpt->ioas_id = idev->ioas_id;
+
+ viommu->iommufd = idev->iommufd;
+ viommu->s2_hwpt = s2_hwpt;
+
+ s->viommu = viommu;
+ return true;
+
+free_bypass_hwpt:
+ iommufd_backend_free_id(idev->iommufd, viommu->bypass_hwpt_id);
+free_abort_hwpt:
+ iommufd_backend_free_id(idev->iommufd, viommu->abort_hwpt_id);
+free_viommu_core:
+ iommufd_backend_free_id(idev->iommufd, viommu->core->viommu_id);
+ g_free(viommu->core);
+free_viommu:
+ g_free(viommu);
+ host_iommu_device_iommufd_attach_hwpt(idev, sdev->idev->ioas_id, errp);
+free_s2_hwpt:
+ iommufd_backend_free_id(idev->iommufd, s2_hwpt_id);
+ return false;
+}
+
+static bool smmu_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+ HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+ SMMUState *s = opaque;
+ SMMUPciBus *sbus = smmu_get_sbus(s, bus);
+ SMMUDevice *sdev = smmu_get_sdev(s, sbus, bus, devfn);
+
+ if (!s->nested) {
+ return true;
+ }
+
+ if (sdev->idev) {
+ if (sdev->idev != idev) {
+ return false;//-EEXIST;
+ } else {
+ return true;
+ }
+ }
+
+ if (!idev) {
+ return true;
+ }
+
+ if (!smmu_dev_attach_viommu(sdev, idev, errp)) {
+ error_report("Unable to attach viommu");
+ return false;
+ }
+
+ sdev->idev = idev;
+ sdev->viommu = s->viommu;
+ QLIST_INSERT_HEAD(&s->viommu->device_list, sdev, next);
+ trace_smmu_set_iommu_device(devfn, smmu_get_sid(sdev));
+
+ return true;
+}
+
+static void smmu_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
+{
+ SMMUDevice *sdev;
+ SMMUViommu *viommu;
+ SMMUState *s = opaque;
+ SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus);
+
+ if (!s->nested) {
+ return;
+ }
+
+ if (!sbus) {
+ return;
+ }
+
+ sdev = sbus->pbdev[devfn];
+ if (!sdev) {
+ return;
+ }
+
+ if (!host_iommu_device_iommufd_attach_hwpt(sdev->idev,
+ sdev->idev->ioas_id, NULL)) {
+ error_report("Unable to attach dev to the default HW pagetable");
+ }
+
+ viommu = sdev->viommu;
+
+ sdev->idev = NULL;
+ sdev->viommu = NULL;
+ QLIST_REMOVE(sdev, next);
+ trace_smmu_unset_iommu_device(devfn, smmu_get_sid(sdev));
+
+ if (QLIST_EMPTY(&viommu->device_list)) {
+ iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id);
+ iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id);
+ iommufd_backend_free_id(viommu->iommufd, viommu->core->viommu_id);
+ g_free(viommu->core);
+ iommufd_backend_free_id(viommu->iommufd, viommu->s2_hwpt->hwpt_id);
+ g_free(viommu->s2_hwpt);
+ g_free(viommu);
+ s->viommu = NULL;
+ }
+}
+
static const PCIIOMMUOps smmu_ops = {
.get_address_space = smmu_find_add_as,
+ .set_iommu_device = smmu_dev_set_iommu_device,
+ .unset_iommu_device = smmu_dev_unset_iommu_device,
};
IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid)
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index cdc1ea06a8..58e0636e95 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -5,6 +5,8 @@ virt_acpi_setup(void) "No fw cfg or ACPI disabled. Bailing out."
# smmu-common.c
smmu_add_mr(const char *name) "%s"
+smmu_set_iommu_device(int devfn, uint32_t sid) "devfn=%d (sid=%d)"
+smmu_unset_iommu_device(int devfn, uint32_t sid) "devfn=%d (sid=%d)"
smmu_ptw_level(int stage, int level, uint64_t iova, size_t subpage_size, uint64_t baseaddr, uint32_t offset, uint64_t pte) "stage=%d level=%d iova=0x%"PRIx64" subpage_sz=0x%zx baseaddr=0x%"PRIx64" offset=%d => pte=0x%"PRIx64
smmu_ptw_invalid_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, uint32_t offset, uint64_t pte) "stage=%d level=%d base@=0x%"PRIx64" pte@=0x%"PRIx64" offset=%d pte=0x%"PRIx64
smmu_ptw_page_pte(int stage, int level, uint64_t iova, uint64_t baseaddr, uint64_t pteaddr, uint64_t pte, uint64_t address) "stage=%d level=%d iova=0x%"PRIx64" base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" page address = 0x%"PRIx64
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index eae5d4d05b..3bfb68cef6 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -23,6 +23,7 @@
#include "hw/pci/pci.h"
#include "qom/object.h"
#include "sysemu/iommufd.h"
+#include <linux/iommufd.h>
#define SMMU_PCI_BUS_MAX 256
#define SMMU_PCI_DEVFN_MAX 256
@@ -107,11 +108,30 @@ typedef struct SMMUTransCfg {
struct SMMUS2Cfg s2cfg;
} SMMUTransCfg;
+typedef struct SMMUS2Hwpt {
+ IOMMUFDBackend *iommufd;
+ uint32_t hwpt_id;
+ uint32_t ioas_id;
+} SMMUS2Hwpt;
+
+typedef struct SMMUViommu {
+ void *smmu;
+ IOMMUFDBackend *iommufd;
+ IOMMUFDViommu *core;
+ SMMUS2Hwpt *s2_hwpt;
+ uint32_t bypass_hwpt_id;
+ uint32_t abort_hwpt_id;
+ QLIST_HEAD(, SMMUDevice) device_list;
+ QLIST_ENTRY(SMMUViommu) next;
+} SMMUViommu;
+
typedef struct SMMUDevice {
void *smmu;
PCIBus *bus;
int devfn;
IOMMUMemoryRegion iommu;
+ HostIOMMUDeviceIOMMUFD *idev;
+ SMMUViommu *viommu;
AddressSpace as;
uint32_t cfg_cache_hits;
uint32_t cfg_cache_misses;
@@ -139,6 +159,7 @@ struct SMMUState {
/* Nested SMMU */
bool nested;
+ SMMUViommu *viommu;
GHashTable *smmu_pcibus_by_busptr;
GHashTable *configs; /* cache for configuration data */
--
2.41.0.windows.1

View File

@ -0,0 +1,75 @@
From 6c330f39cc08e4c641a3567e2b6ad0ebcadf5165 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Fri, 21 Jun 2024 21:22:04 +0000
Subject: [PATCH] hw/arm/smmu-common: Bypass emulated IOTLB for a nested SMMU
If a vSMMU is configured as a nested one, HW IOTLB will be used and all
cache invalidation should be done to the HW IOTLB too, v.s. the emulated
iotlb. In this case, an iommu notifier isn't registered, as the devices
behind a nested SMMU would stay in the system address space for stage-2
mappings.
However, the KVM code still requests an iommu address space to translate
an MSI doorbell gIOVA via get_msi_address_space() and translate().
Since a nested SMMU doesn't register an iommu notifier to flush emulated
iotlb, bypass the emulated IOTLB and always walk through the guest-level
IO page table.
Note that regular nested SMMU could still register an iommu notifier for
IOTLB invalidation, since QEMU traps the invalidation commands. But this
would result in invalidation inefficiency since each invlaidation would
be doubled for both HW IOTLB and the emulated IOTLB. Also, with NVIDIA's
CMDQV feature on its Grace SoC, invalidation commands are issued to the
CMDQ HW direclty, without any trapping. So, there is no way to maintain
the emulated IOTLB. Meanwhile, the stage-1 translation request from KVM
is only activated in case of an MSI table update, which does not happen
that often to impact performance if walking through the guest RAM every
time.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmu-common.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index c5f3e02065..016418a48c 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -75,6 +75,16 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
uint8_t level = 4 - (inputsize - 4) / stride;
SMMUTLBEntry *entry = NULL;
+ /*
+ * Stage-1 translation with a nested SMMU in general uses HW IOTLB. However,
+ * KVM still requests for an iommu address space for an MSI fixup by looking
+ * up stage-1 page table. Make sure we don't go through the emulated pathway
+ * so that the emulated iotlb will not need any invalidation.
+ */
+ if (bs->nested) {
+ return NULL;
+ }
+
while (level <= 3) {
uint64_t subpage_size = 1ULL << level_shift(level, tt->granule_sz);
uint64_t mask = subpage_size - 1;
@@ -110,6 +120,16 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new)
SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1);
uint8_t tg = (new->granule - 10) / 2;
+ /*
+ * Stage-1 translation with a nested SMMU in general uses HW IOTLB. However,
+ * KVM still requests for an iommu address space for an MSI fixup by looking
+ * up stage-1 page table. Make sure we don't go through the emulated pathway
+ * so that the emulated iotlb will not need any invalidation.
+ */
+ if (bs->nested) {
+ return;
+ }
+
if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) {
smmu_iotlb_inv_all(bs);
}
--
2.41.0.windows.1

View File

@ -0,0 +1,68 @@
From 2fea4f93632679afcb15f0c35b3d9abeede37778 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 10 Apr 2024 16:37:25 +0000
Subject: [PATCH] hw/arm/smmu-common: Extract smmu_get_sbus and smmu_get_sdev
helpers
Add two helpers to get sbus and sdev respectively. These will be used
by the following patch adding set/unset_iommu_device ops.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmu-common.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 016418a48c..03d9ff58d4 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -589,12 +589,9 @@ SMMUPciBus *smmu_find_smmu_pcibus(SMMUState *s, uint8_t bus_num)
return NULL;
}
-static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn)
+static SMMUPciBus *smmu_get_sbus(SMMUState *s, PCIBus *bus)
{
- SMMUState *s = opaque;
SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus);
- SMMUDevice *sdev;
- static unsigned int index;
if (!sbus) {
sbus = g_malloc0(sizeof(SMMUPciBus) +
@@ -603,7 +600,15 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn)
g_hash_table_insert(s->smmu_pcibus_by_busptr, bus, sbus);
}
- sdev = sbus->pbdev[devfn];
+ return sbus;
+}
+
+static SMMUDevice *smmu_get_sdev(SMMUState *s, SMMUPciBus *sbus,
+ PCIBus *bus, int devfn)
+{
+ SMMUDevice *sdev = sbus->pbdev[devfn];
+ static unsigned int index;
+
if (!sdev) {
char *name = g_strdup_printf("%s-%d-%d", s->mrtypename, devfn, index++);
@@ -622,6 +627,15 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn)
g_free(name);
}
+ return sdev;
+}
+
+static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn)
+{
+ SMMUState *s = opaque;
+ SMMUPciBus *sbus = smmu_get_sbus(s, bus);
+ SMMUDevice *sdev = smmu_get_sdev(s, sbus, bus, devfn);
+
return &sdev->as;
}
--
2.41.0.windows.1

View File

@ -0,0 +1,114 @@
From d8d7f775b602a84c37b8aced11e00cb5b0521c4e Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 18 Jun 2024 17:22:18 -0700
Subject: [PATCH] hw/arm/smmu-common: Replace smmu_iommu_mr with smmu_find_sdev
The caller of smmu_iommu_mr wants to get sdev for smmuv3_flush_config().
Do it directly instead of bridging with an iommu mr pointer.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Message-id: 20240619002218.926674-1-nicolinc@nvidia.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
hw/arm/smmu-common.c | 8 ++------
hw/arm/smmuv3.c | 12 ++++--------
include/hw/arm/smmu-common.h | 4 ++--
3 files changed, 8 insertions(+), 16 deletions(-)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 9e9af8f5c7..d0bc620606 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -837,20 +837,16 @@ static const PCIIOMMUOps smmu_ops = {
.unset_iommu_device = smmu_dev_unset_iommu_device,
};
-IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid)
+SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid)
{
uint8_t bus_n, devfn;
SMMUPciBus *smmu_bus;
- SMMUDevice *smmu;
bus_n = PCI_BUS_NUM(sid);
smmu_bus = smmu_find_smmu_pcibus(s, bus_n);
if (smmu_bus) {
devfn = SMMU_PCI_DEVFN(sid);
- smmu = smmu_bus->pbdev[devfn];
- if (smmu) {
- return &smmu->iommu;
- }
+ return smmu_bus->pbdev[devfn];
}
return NULL;
}
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 9d44bb19bc..b2ffe2d40b 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1407,20 +1407,18 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
case SMMU_CMD_CFGI_STE:
{
uint32_t sid = CMD_SID(&cmd);
- IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid);
- SMMUDevice *sdev;
+ SMMUDevice *sdev = smmu_find_sdev(bs, sid);
if (CMD_SSEC(&cmd)) {
cmd_error = SMMU_CERROR_ILL;
break;
}
- if (!mr) {
+ if (!sdev) {
break;
}
trace_smmuv3_cmdq_cfgi_ste(sid);
- sdev = container_of(mr, SMMUDevice, iommu);
smmuv3_flush_config(sdev);
smmuv3_install_nested_ste(sdev, sid);
@@ -1452,20 +1450,18 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
case SMMU_CMD_CFGI_CD_ALL:
{
uint32_t sid = CMD_SID(&cmd);
- IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid);
- SMMUDevice *sdev;
+ SMMUDevice *sdev = smmu_find_sdev(bs, sid);
if (CMD_SSEC(&cmd)) {
cmd_error = SMMU_CERROR_ILL;
break;
}
- if (!mr) {
+ if (!sdev) {
break;
}
trace_smmuv3_cmdq_cfgi_cd(sid);
- sdev = container_of(mr, SMMUDevice, iommu);
smmuv3_flush_config(sdev);
break;
}
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 955ca716a5..e30539a8d4 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -234,8 +234,8 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
*/
SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova);
-/* Return the iommu mr associated to @sid, or NULL if none */
-IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid);
+/* Return the SMMUDevice associated to @sid, or NULL if none */
+SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid);
#define SMMU_IOTLB_MAX_SIZE 256
--
2.41.0.windows.1

View File

@ -0,0 +1,87 @@
From 3c6c29612d5ca0ff07bcb8a45735a3877c8fadd4 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Thu, 7 Dec 2023 20:04:47 +0000
Subject: [PATCH] hw/arm/smmu-common: Return sysmem if stage-1 is bypassed
When nested translation is enabled, there are 2-stage translation occuring
to two different address spaces: stage-1 in the iommu as, while stage-2 in
the system as.
If a device attached to the vSMMU doesn't enable stage-1 translation, e.g.
vSTE sets to Config=Bypass, the system as should be returned, so QEMU can
set up system memory mappings onto the stage-2 page table.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmu-common.c | 18 +++++++++++++++++-
include/hw/arm/smmu-common.h | 3 +++
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index a79eb34277..cc41bf3de8 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -622,6 +622,9 @@ static SMMUDevice *smmu_get_sdev(SMMUState *s, SMMUPciBus *sbus,
memory_region_init_iommu(&sdev->iommu, sizeof(sdev->iommu),
s->mrtypename,
OBJECT(s), name, UINT64_MAX);
+ if (s->nested) {
+ address_space_init(&sdev->as_sysmem, &s->root, name);
+ }
address_space_init(&sdev->as,
MEMORY_REGION(&sdev->iommu), name);
trace_smmu_add_mr(name);
@@ -637,7 +640,12 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn)
SMMUPciBus *sbus = smmu_get_sbus(s, bus);
SMMUDevice *sdev = smmu_get_sdev(s, sbus, bus, devfn);
- return &sdev->as;
+ /* Return the system as if the device uses stage-2 only */
+ if (s->nested && !sdev->s1_hwpt) {
+ return &sdev->as_sysmem;
+ } else {
+ return &sdev->as;
+ }
}
static bool smmu_dev_attach_viommu(SMMUDevice *sdev,
@@ -983,6 +991,14 @@ static void smmu_base_realize(DeviceState *dev, Error **errp)
g_free, g_free);
s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL);
+ if (s->nested) {
+ memory_region_init(&s->root, OBJECT(s), "root", UINT64_MAX);
+ memory_region_init_alias(&s->sysmem, OBJECT(s),
+ "smmu-sysmem", get_system_memory(), 0,
+ memory_region_size(get_system_memory()));
+ memory_region_add_subregion(&s->root, 0, &s->sysmem);
+ }
+
if (s->primary_bus) {
pci_setup_iommu(s->primary_bus, &smmu_ops, s);
} else {
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 66dc7206ea..37dfeed026 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -143,6 +143,7 @@ typedef struct SMMUDevice {
SMMUViommu *viommu;
SMMUS1Hwpt *s1_hwpt;
AddressSpace as;
+ AddressSpace as_sysmem;
uint32_t cfg_cache_hits;
uint32_t cfg_cache_misses;
QLIST_ENTRY(SMMUDevice) next;
@@ -165,7 +166,9 @@ struct SMMUState {
/* <private> */
SysBusDevice dev;
const char *mrtypename;
+ MemoryRegion root;
MemoryRegion iomem;
+ MemoryRegion sysmem;
/* Nested SMMU */
bool nested;
--
2.41.0.windows.1

View File

@ -0,0 +1,233 @@
From 9895192512af4b52aff88432618a474e69b44bdd Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Wed, 6 Nov 2024 14:47:27 +0000
Subject: [PATCH] hw/arm/smmuv3: Add initial support for SMMUv3 Nested device
Based on SMMUv3 as a parent device, add a user-creatable
smmuv3-nested device. Subsequent patches will add support to
specify a PCI bus for this device.
Currently only supported for "virt", so hook up the sybus mem & irq
for that as well.
No FDT support is added for now.
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/smmuv3.c | 34 ++++++++++++++++++++++++++++++++++
hw/arm/virt.c | 31 +++++++++++++++++++++++++++++--
hw/core/sysbus-fdt.c | 1 +
include/hw/arm/smmuv3.h | 15 +++++++++++++++
include/hw/arm/virt.h | 6 ++++++
5 files changed, 85 insertions(+), 2 deletions(-)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index b860c8385f..3010471cdc 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -2069,6 +2069,19 @@ static void smmu_realize(DeviceState *d, Error **errp)
smmu_init_irq(s, dev);
}
+static void smmu_nested_realize(DeviceState *d, Error **errp)
+{
+ SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
+ SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_GET_CLASS(s_nested);
+ Error *local_err = NULL;
+
+ c->parent_realize(d, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+}
+
static const VMStateDescription vmstate_smmuv3_queue = {
.name = "smmuv3_queue",
.version_id = 1,
@@ -2167,6 +2180,18 @@ static void smmuv3_class_init(ObjectClass *klass, void *data)
device_class_set_props(dc, smmuv3_properties);
}
+static void smmuv3_nested_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_CLASS(klass);
+
+ dc->vmsd = &vmstate_smmuv3;
+ device_class_set_parent_realize(dc, smmu_nested_realize,
+ &c->parent_realize);
+ dc->user_creatable = true;
+ dc->hotpluggable = false;
+}
+
static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
IOMMUNotifierFlag old,
IOMMUNotifierFlag new,
@@ -2205,6 +2230,14 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
imrc->notify_flag_changed = smmuv3_notify_flag_changed;
}
+static const TypeInfo smmuv3_nested_type_info = {
+ .name = TYPE_ARM_SMMUV3_NESTED,
+ .parent = TYPE_ARM_SMMUV3,
+ .instance_size = sizeof(SMMUv3NestedState),
+ .class_size = sizeof(SMMUv3NestedClass),
+ .class_init = smmuv3_nested_class_init,
+};
+
static const TypeInfo smmuv3_type_info = {
.name = TYPE_ARM_SMMUV3,
.parent = TYPE_ARM_SMMU,
@@ -2223,6 +2256,7 @@ static const TypeInfo smmuv3_iommu_memory_region_info = {
static void smmuv3_register_types(void)
{
type_register(&smmuv3_type_info);
+ type_register(&smmuv3_nested_type_info);
type_register(&smmuv3_iommu_memory_region_info);
}
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 08c40c314b..a55f297af2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -166,6 +166,7 @@ static const MemMapEntry base_memmap[] = {
/* In the virtCCA scenario, this space is used for MSI interrupt mapping */
[VIRT_CVM_MSI] = { 0x0a001000, 0x00fff000 },
[VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 },
+ [VIRT_SMMU_NESTED] = { 0x0b010000, 0x00ff0000},
/* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
[VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
[VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 },
@@ -211,6 +212,7 @@ static const int a15irqmap[] = {
[VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
[VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */
[VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
+ [VIRT_SMMU_NESTED] = 200,
};
static const char *valid_cpus[] = {
@@ -3613,10 +3615,34 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
+ MachineClass *mc = MACHINE_GET_CLASS(vms);
- if (vms->platform_bus_dev) {
- MachineClass *mc = MACHINE_GET_CLASS(vms);
+ /* For smmuv3-nested devices we need to set the mem & irq */
+ if (device_is_dynamic_sysbus(mc, dev) &&
+ object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_NESTED)) {
+ hwaddr base = vms->memmap[VIRT_SMMU_NESTED].base;
+ int irq = vms->irqmap[VIRT_SMMU_NESTED];
+
+ if (vms->smmu_nested_count >= MAX_SMMU_NESTED) {
+ error_setg(errp, "smmuv3-nested max count reached!");
+ return;
+ }
+
+ base += (vms->smmu_nested_count * SMMU_IO_LEN);
+ irq += (vms->smmu_nested_count * NUM_SMMU_IRQS);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
+ for (int i = 0; i < 4; i++) {
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
+ qdev_get_gpio_in(vms->gic, irq + i));
+ }
+ if (vms->iommu != VIRT_IOMMU_SMMUV3_NESTED) {
+ vms->iommu = VIRT_IOMMU_SMMUV3_NESTED;
+ }
+ vms->smmu_nested_count++;
+ }
+
+ if (vms->platform_bus_dev) {
if (device_is_dynamic_sysbus(mc, dev)) {
platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev),
SYS_BUS_DEVICE(dev));
@@ -3789,6 +3815,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3_NESTED);
#ifdef CONFIG_TPM
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
#endif
diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
index eebcd28f9a..0f0d0b3e58 100644
--- a/hw/core/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -489,6 +489,7 @@ static const BindingEntry bindings[] = {
#ifdef CONFIG_LINUX
TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node),
TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
+ TYPE_BINDING("arm-smmuv3-nested", no_fdt_node),
VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
#endif
#ifdef CONFIG_TPM
diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
index d183a62766..87e628be7a 100644
--- a/include/hw/arm/smmuv3.h
+++ b/include/hw/arm/smmuv3.h
@@ -84,6 +84,21 @@ struct SMMUv3Class {
#define TYPE_ARM_SMMUV3 "arm-smmuv3"
OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3)
+#define TYPE_ARM_SMMUV3_NESTED "arm-smmuv3-nested"
+OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass, ARM_SMMUV3_NESTED)
+
+struct SMMUv3NestedState {
+ SMMUv3State smmuv3_state;
+};
+
+struct SMMUv3NestedClass {
+ /*< private >*/
+ SMMUv3Class smmuv3_class;
+ /*< public >*/
+
+ DeviceRealize parent_realize;
+};
+
#define STAGE1_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S1P)
#define STAGE2_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S2P)
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index e6a449becd..cd41e28202 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -109,6 +109,9 @@ typedef enum {
/* MMIO region size for SMMUv3 */
#define SMMU_IO_LEN 0x20000
+/* Max supported nested SMMUv3 */
+#define MAX_SMMU_NESTED 64
+
enum {
VIRT_FLASH,
VIRT_MEM,
@@ -121,6 +124,7 @@ enum {
VIRT_GIC_ITS,
VIRT_GIC_REDIST,
VIRT_SMMU,
+ VIRT_SMMU_NESTED,
VIRT_UART,
VIRT_CPUFREQ,
VIRT_MMIO,
@@ -155,6 +159,7 @@ enum {
typedef enum VirtIOMMUType {
VIRT_IOMMU_NONE,
VIRT_IOMMU_SMMUV3,
+ VIRT_IOMMU_SMMUV3_NESTED,
VIRT_IOMMU_VIRTIO,
} VirtIOMMUType;
@@ -222,6 +227,7 @@ struct VirtMachineState {
bool mte;
bool dtb_randomness;
bool pmu;
+ int smmu_nested_count;
OnOffAuto acpi;
VirtGICType gic_version;
VirtIOMMUType iommu;
--
2.41.0.windows.1

View File

@ -0,0 +1,92 @@
From 707bd8198642549595f11ef34c80094fbf7d2de1 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Mon, 29 Apr 2024 21:26:41 +0000
Subject: [PATCH] hw/arm/smmuv3: Add missing STE invalidation
Multitple STEs can be invalidated in a range via SMMU_CMD_CFGI_STE_RANGE
or SMMU_CMD_CFGI_ALL command.
Add the missing STE invalidation in this pathway.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmu-internal.h | 1 +
hw/arm/smmuv3.c | 28 +++++++++++++++++++++++++---
2 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h
index 843bebb185..5a81dd1b82 100644
--- a/hw/arm/smmu-internal.h
+++ b/hw/arm/smmu-internal.h
@@ -142,6 +142,7 @@ typedef struct SMMUIOTLBPageInvInfo {
} SMMUIOTLBPageInvInfo;
typedef struct SMMUSIDRange {
+ SMMUState *state;
uint32_t start;
uint32_t end;
} SMMUSIDRange;
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 540831ab8e..9d44bb19bc 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1322,11 +1322,9 @@ static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid)
}
static gboolean
-smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
+_smmuv3_invalidate_ste(SMMUDevice *sdev, SMMUSIDRange *sid_range)
{
- SMMUDevice *sdev = (SMMUDevice *)key;
uint32_t sid = smmu_get_sid(sdev);
- SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
if (sid < sid_range->start || sid > sid_range->end) {
return false;
@@ -1337,6 +1335,28 @@ smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
return true;
}
+static gboolean
+smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
+{
+ return _smmuv3_invalidate_ste((SMMUDevice *)key, (SMMUSIDRange *)user_data);
+}
+
+static void smmuv3_invalidate_nested_ste(SMMUSIDRange *sid_range)
+{
+ SMMUState *bs = sid_range->state;
+ SMMUDevice *sdev;
+
+ if (!bs->viommu) {
+ return;
+ }
+
+ QLIST_FOREACH(sdev, &bs->viommu->device_list, next) {
+ if (smmu_get_sid(sdev)) {
+ _smmuv3_invalidate_ste(sdev, sid_range);
+ }
+ }
+}
+
static int smmuv3_cmdq_consume(SMMUv3State *s)
{
SMMUState *bs = ARM_SMMU(s);
@@ -1418,12 +1438,14 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
}
mask = (1ULL << (range + 1)) - 1;
+ sid_range.state = bs;
sid_range.start = sid & ~mask;
sid_range.end = sid_range.start + mask;
trace_smmuv3_cmdq_cfgi_ste_range(sid_range.start, sid_range.end);
g_hash_table_foreach_remove(bs->configs, smmuv3_invalidate_ste,
&sid_range);
+ smmuv3_invalidate_nested_ste(&sid_range);
break;
}
case SMMU_CMD_CFGI_CD:
--
2.41.0.windows.1

View File

@ -0,0 +1,255 @@
From 13b84313c9f7ca4823abdbad92baf091c337861e Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Fri, 21 Apr 2023 15:13:53 -0700
Subject: [PATCH] hw/arm/smmuv3: Add smmu_dev_install_nested_ste() for CFGI_STE
Call smmu_dev_install_nested_ste and eventually down to IOMMU_HWPT_ALLOC
ioctl for a nested HWPT allocation.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmu-common.c | 9 ++++
hw/arm/smmuv3-internal.h | 1 +
hw/arm/smmuv3.c | 97 +++++++++++++++++++++++++++++++++++-
hw/arm/trace-events | 1 +
include/hw/arm/smmu-common.h | 14 ++++++
5 files changed, 120 insertions(+), 2 deletions(-)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index cc41bf3de8..9e9af8f5c7 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -780,6 +780,7 @@ static bool smmu_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
static void smmu_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
{
+ SMMUVdev *vdev;
SMMUDevice *sdev;
SMMUViommu *viommu;
SMMUState *s = opaque;
@@ -803,13 +804,21 @@ static void smmu_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
error_report("Unable to attach dev to the default HW pagetable");
}
+ vdev = sdev->vdev;
viommu = sdev->viommu;
sdev->idev = NULL;
sdev->viommu = NULL;
+ sdev->vdev = NULL;
QLIST_REMOVE(sdev, next);
trace_smmu_unset_iommu_device(devfn, smmu_get_sid(sdev));
+ if (vdev) {
+ iommufd_backend_free_id(viommu->iommufd, vdev->core->vdev_id);
+ g_free(vdev->core);
+ g_free(vdev);
+ }
+
if (QLIST_EMPTY(&viommu->device_list)) {
iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id);
iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id);
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 6076025ad6..163459d450 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -552,6 +552,7 @@ typedef struct CD {
#define STE_S1FMT(x) extract32((x)->word[0], 4 , 2)
#define STE_S1CDMAX(x) extract32((x)->word[1], 27, 5)
+#define STE_S1DSS(x) extract32((x)->word[2], 0, 2)
#define STE_S1STALLD(x) extract32((x)->word[2], 27, 1)
#define STE_EATS(x) extract32((x)->word[2], 28, 2)
#define STE_STRW(x) extract32((x)->word[2], 30, 2)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 253d297eec..540831ab8e 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -563,6 +563,27 @@ bad_ste:
return -EINVAL;
}
+static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config)
+{
+
+ if (STE_CFG_ABORT(config)) {
+ cfg->aborted = true;
+ return;
+ }
+ if (STE_CFG_BYPASS(config)) {
+ cfg->bypassed = true;
+ return;
+ }
+
+ if (STE_CFG_S1_ENABLED(config)) {
+ cfg->stage = SMMU_STAGE_1;
+ }
+
+ if (STE_CFG_S2_ENABLED(config)) {
+ cfg->stage |= SMMU_STAGE_2;
+ }
+}
+
/* Returns < 0 in case of invalid STE, 0 otherwise */
static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
STE *ste, SMMUEventInfo *event)
@@ -579,12 +600,19 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
config = STE_CONFIG(ste);
- if (STE_CFG_ABORT(config)) {
+ decode_ste_config(cfg, config);
+
+ /* S1DSS.Terminate is same as Config.abort for default stream */
+ if (STE_CFG_S1_ENABLED(config) && STE_S1DSS(ste) == 0) {
cfg->aborted = true;
+ }
+
+ if (cfg->aborted || cfg->bypassed) {
return 0;
}
- if (STE_CFG_BYPASS(config)) {
+ /* S1DSS.Bypass is same as Config.bypass for default stream */
+ if (STE_CFG_S1_ENABLED(config) && STE_S1DSS(ste) == 0x1) {
cfg->bypassed = true;
return 0;
}
@@ -1231,6 +1259,68 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
}
}
+static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid)
+{
+#ifdef __linux__
+ SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
+ .inval_ste_allowed = true};
+ struct iommu_hwpt_arm_smmuv3 nested_data = {};
+ SMMUv3State *s = sdev->smmu;
+ SMMUState *bs = &s->smmu_state;
+ uint32_t config;
+ STE ste;
+ int ret;
+
+ if (!sdev->viommu || !bs->nested) {
+ return;
+ }
+
+ if (!sdev->vdev && sdev->idev && sdev->viommu) {
+ SMMUVdev *vdev = g_new0(SMMUVdev, 1);
+ vdev->core = iommufd_backend_alloc_vdev(sdev->idev, sdev->viommu->core,
+ sid);
+ if (!vdev->core) {
+ error_report("failed to allocate a vDEVICE");
+ g_free(vdev);
+ return;
+ }
+ sdev->vdev = vdev;
+ }
+
+ ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
+ if (ret) {
+ /*
+ * For a 2-level Stream Table, the level-2 table might not be ready
+ * until the device gets inserted to the stream table. Ignore this.
+ */
+ return;
+ }
+
+ config = STE_CONFIG(&ste);
+ if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
+ smmu_dev_uninstall_nested_ste(sdev, STE_CFG_ABORT(config));
+ smmuv3_flush_config(sdev);
+ return;
+ }
+
+ nested_data.ste[0] = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
+ nested_data.ste[1] = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
+ /* V | CONFIG | S1FMT | S1CTXPTR | S1CDMAX */
+ nested_data.ste[0] &= 0xf80fffffffffffffULL;
+ /* S1DSS | S1CIR | S1COR | S1CSH | S1STALLD | EATS */
+ nested_data.ste[1] &= 0x380000ffULL;
+
+ ret = smmu_dev_install_nested_ste(sdev, IOMMU_HWPT_DATA_ARM_SMMUV3,
+ sizeof(nested_data), &nested_data);
+ if (ret) {
+ error_report("Unable to install nested STE=%16LX:%16LX, ret=%d",
+ nested_data.ste[1], nested_data.ste[0], ret);
+ }
+
+ trace_smmuv3_install_nested_ste(sid, nested_data.ste[1], nested_data.ste[0]);
+#endif
+}
+
static gboolean
smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
{
@@ -1241,6 +1331,8 @@ smmuv3_invalidate_ste(gpointer key, gpointer value, gpointer user_data)
if (sid < sid_range->start || sid > sid_range->end) {
return false;
}
+ smmuv3_flush_config(sdev);
+ smmuv3_install_nested_ste(sdev, sid);
trace_smmuv3_config_cache_inv(sid);
return true;
}
@@ -1310,6 +1402,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
trace_smmuv3_cmdq_cfgi_ste(sid);
sdev = container_of(mr, SMMUDevice, iommu);
smmuv3_flush_config(sdev);
+ smmuv3_install_nested_ste(sdev, sid);
break;
}
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 1e3d86382d..490da6349c 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -57,4 +57,5 @@ smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s
smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s"
smmuv3_get_device_info(uint32_t idr0, uint32_t idr1, uint32_t idr3, uint32_t idr5) "idr0=0x%x idr1=0x%x idr3=0x%x idr5=0x%x"
smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint16_t vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64
+smmuv3_install_nested_ste(uint32_t sid, uint64_t ste_1, uint64_t ste_0) "sid=%d ste=%"PRIx64":%"PRIx64
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index d120c352cf..955ca716a5 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -51,6 +51,13 @@ typedef enum {
SMMU_PTW_ERR_PERMISSION, /* Permission fault */
} SMMUPTWEventType;
+/* SMMU Stage */
+typedef enum {
+ SMMU_STAGE_1 = 1,
+ SMMU_STAGE_2,
+ SMMU_NESTED,
+} SMMUStage;
+
typedef struct SMMUPTWEventInfo {
int stage;
SMMUPTWEventType type;
@@ -125,6 +132,12 @@ typedef struct SMMUViommu {
QLIST_ENTRY(SMMUViommu) next;
} SMMUViommu;
+typedef struct SMMUVdev {
+ SMMUViommu *vsmmu;
+ IOMMUFDVdev *core;
+ uint32_t sid;
+}SMMUVdev;
+
typedef struct SMMUS1Hwpt {
void *smmu;
IOMMUFDBackend *iommufd;
@@ -141,6 +154,7 @@ typedef struct SMMUDevice {
IOMMUMemoryRegion iommu;
HostIOMMUDeviceIOMMUFD *idev;
SMMUViommu *viommu;
+ SMMUVdev *vdev;
SMMUS1Hwpt *s1_hwpt;
AddressSpace as;
AddressSpace as_sysmem;
--
2.41.0.windows.1

View File

@ -0,0 +1,95 @@
From afca50145f52601d912a805b65bd4530e9278388 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Wed, 6 Nov 2024 15:53:45 +0000
Subject: [PATCH] hw/arm/smmuv3: Associate a pci bus with a SMMUv3 Nested
device
Subsequent patches will add IORT modifications to get this working.
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/smmuv3.c | 27 +++++++++++++++++++++++++++
include/hw/arm/smmuv3.h | 2 ++
2 files changed, 29 insertions(+)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 3010471cdc..66e4e1b57d 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -24,6 +24,7 @@
#include "hw/qdev-properties.h"
#include "hw/qdev-core.h"
#include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
#include "cpu.h"
#include "trace.h"
#include "qemu/log.h"
@@ -2069,12 +2070,32 @@ static void smmu_realize(DeviceState *d, Error **errp)
smmu_init_irq(s, dev);
}
+static int smmuv3_nested_pci_host_bridge(Object *obj, void *opaque)
+{
+ DeviceState *d = opaque;
+ SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
+
+ if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
+ PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;
+ if (s_nested->pci_bus && !strcmp(bus->qbus.name, s_nested->pci_bus)) {
+ object_property_set_link(OBJECT(d), "primary-bus", OBJECT(bus),
+ &error_abort);
+ }
+ }
+ return 0;
+}
+
static void smmu_nested_realize(DeviceState *d, Error **errp)
{
SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_GET_CLASS(s_nested);
+ SysBusDevice *dev = SYS_BUS_DEVICE(d);
Error *local_err = NULL;
+ object_child_foreach_recursive(object_get_root(),
+ smmuv3_nested_pci_host_bridge, d);
+ object_property_set_bool(OBJECT(dev), "nested", true, &error_abort);
+
c->parent_realize(d, &local_err);
if (local_err) {
error_propagate(errp, local_err);
@@ -2161,6 +2182,11 @@ static Property smmuv3_properties[] = {
DEFINE_PROP_END_OF_LIST()
};
+static Property smmuv3_nested_properties[] = {
+ DEFINE_PROP_STRING("pci-bus", SMMUv3NestedState, pci_bus),
+ DEFINE_PROP_END_OF_LIST()
+};
+
static void smmuv3_instance_init(Object *obj)
{
/* Nothing much to do here as of now */
@@ -2188,6 +2214,7 @@ static void smmuv3_nested_class_init(ObjectClass *klass, void *data)
dc->vmsd = &vmstate_smmuv3;
device_class_set_parent_realize(dc, smmu_nested_realize,
&c->parent_realize);
+ device_class_set_props(dc, smmuv3_nested_properties);
dc->user_creatable = true;
dc->hotpluggable = false;
}
diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
index 87e628be7a..96513fce56 100644
--- a/include/hw/arm/smmuv3.h
+++ b/include/hw/arm/smmuv3.h
@@ -89,6 +89,8 @@ OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass, ARM_SMMUV3_NESTED)
struct SMMUv3NestedState {
SMMUv3State smmuv3_state;
+
+ char *pci_bus;
};
struct SMMUv3NestedClass {
--
2.41.0.windows.1

View File

@ -0,0 +1,38 @@
From fac9784bbedb50dc964feb9cf70b6f37472fcf60 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Fri, 21 Apr 2023 22:10:44 -0700
Subject: [PATCH] hw/arm/smmuv3: Check idr registers for STE_S1CDMAX and
STE_S1STALLD
With nested translation, the underlying HW could support those two fields.
Allow them according to the updated idr registers after the hw_info ioctl.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmuv3.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 4208325ab3..253d297eec 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -622,13 +622,14 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
}
}
- if (STE_S1CDMAX(ste) != 0) {
+ if (!FIELD_EX32(s->idr[1], IDR1, SSIDSIZE) && STE_S1CDMAX(ste) != 0) {
qemu_log_mask(LOG_UNIMP,
"SMMUv3 does not support multiple context descriptors yet\n");
goto bad_ste;
}
- if (STE_S1STALLD(ste)) {
+ /* STALL_MODEL being 0b01 means "stall is not supported" */
+ if ((FIELD_EX32(s->idr[0], IDR0, STALL_MODEL) & 0x1) && STE_S1STALLD(ste)) {
qemu_log_mask(LOG_UNIMP,
"SMMUv3 S1 stalling fault model not allowed yet\n");
goto bad_ste;
--
2.41.0.windows.1

View File

@ -0,0 +1,76 @@
From c8267f88b2af37779a597aac00aeaf06adc80ccc Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Mon, 11 Dec 2023 14:42:01 +0000
Subject: [PATCH] hw/arm/smmuv3: Enable sva/stall IDR features
Emulate features that will enable the stall and sva feature in Guest.
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/smmuv3-internal.h | 3 ++-
hw/arm/smmuv3.c | 8 +++-----
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index a411fd4048..cfc04c563e 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -74,6 +74,7 @@ REG32(IDR1, 0x4)
FIELD(IDR1, ECMDQ, 31, 1)
#define SMMU_IDR1_SIDSIZE 16
+#define SMMU_IDR1_SSIDSIZE 16
#define SMMU_CMDQS 19
#define SMMU_EVENTQS 19
@@ -104,7 +105,7 @@ REG32(IDR5, 0x14)
FIELD(IDR5, VAX, 10, 2);
FIELD(IDR5, STALL_MAX, 16, 16);
-#define SMMU_IDR5_OAS 4
+#define SMMU_IDR5_OAS 5
REG32(IIDR, 0x18)
REG32(AIDR, 0x1c)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 66e4e1b57d..8d8dcccd48 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -343,13 +343,14 @@ static void smmuv3_init_regs(SMMUv3State *s)
s->idr[0] = FIELD_DP32(s->idr[0], IDR0, ASID16, 1); /* 16-bit ASID */
s->idr[0] = FIELD_DP32(s->idr[0], IDR0, VMID16, 1); /* 16-bit VMID */
s->idr[0] = FIELD_DP32(s->idr[0], IDR0, TTENDIAN, 2); /* little endian */
- s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STALL_MODEL, 1); /* No stall */
+ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STALL_MODEL, 0); /* stall */
/* terminated transaction will always be aborted/error returned */
s->idr[0] = FIELD_DP32(s->idr[0], IDR0, TERM_MODEL, 1);
/* 2-level stream table supported */
s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STLEVEL, 1);
s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SIDSIZE, SMMU_IDR1_SIDSIZE);
+ s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SSIDSIZE, SMMU_IDR1_SSIDSIZE);
s->idr[1] = FIELD_DP32(s->idr[1], IDR1, EVENTQS, SMMU_EVENTQS);
s->idr[1] = FIELD_DP32(s->idr[1], IDR1, CMDQS, SMMU_CMDQS);
@@ -361,7 +362,7 @@ static void smmuv3_init_regs(SMMUv3State *s)
s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1);
s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2);
- s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */
+ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 48 bits */
/* 4K, 16K and 64K granule support */
s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1);
s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1);
@@ -776,9 +777,6 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
if (!CD_A(cd)) {
goto bad_cd; /* SMMU_IDR0.TERM_MODEL == 1 */
}
- if (CD_S(cd)) {
- goto bad_cd; /* !STE_SECURE && SMMU_IDR0.STALL_MODEL == 1 */
- }
if (CD_HA(cd) || CD_HD(cd)) {
goto bad_cd; /* HTTU = 0 */
}
--
2.41.0.windows.1

View File

@ -0,0 +1,229 @@
From b331acc42fa54ca93496c32d92cdf5397927bff1 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Fri, 21 Apr 2023 15:18:56 -0700
Subject: [PATCH] hw/arm/smmuv3: Forward cache invalidate commands via iommufd
Inroduce an SMMUCommandBatch and some helpers to batch the commands.
Rewind the q->cons accordingly when it fails to execute a batch/command.
Currently separate TLBI commands and device cache commands to avoid some
errata on certain version of SMMUs. Later it should check IIDR register
to detect if underlying SMMU hw has such an erratum.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmuv3-internal.h | 13 +++++
hw/arm/smmuv3.c | 113 ++++++++++++++++++++++++++++++++++++++-
2 files changed, 125 insertions(+), 1 deletion(-)
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 163459d450..a411fd4048 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -226,6 +226,19 @@ static inline bool smmuv3_gerror_irq_enabled(SMMUv3State *s)
#define Q_CONS_WRAP(q) (((q)->cons & WRAP_MASK(q)) >> (q)->log2size)
#define Q_PROD_WRAP(q) (((q)->prod & WRAP_MASK(q)) >> (q)->log2size)
+#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
+
+static inline int smmuv3_q_ncmds(SMMUQueue *q)
+{
+ uint32_t prod = Q_PROD(q);
+ uint32_t cons = Q_CONS(q);
+
+ if (Q_PROD_WRAP(q) == Q_CONS_WRAP(q))
+ return prod - cons;
+ else
+ return WRAP_MASK(q) - cons + prod;
+}
+
static inline bool smmuv3_q_full(SMMUQueue *q)
{
return ((q->cons ^ q->prod) & WRAP_INDEX_MASK(q)) == WRAP_MASK(q);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index b2ffe2d40b..b860c8385f 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1357,16 +1357,85 @@ static void smmuv3_invalidate_nested_ste(SMMUSIDRange *sid_range)
}
}
+/**
+ * SMMUCommandBatch - batch of commands to issue for nested SMMU invalidation
+ * @cmds: Pointer to list of commands
+ * @cons: Pointer to list of CONS corresponding to the commands
+ * @ncmds: Total ncmds in the batch
+ * @dev_cache: Issue to a device cache
+ */
+typedef struct SMMUCommandBatch {
+ Cmd *cmds;
+ uint32_t *cons;
+ uint32_t ncmds;
+ bool dev_cache;
+} SMMUCommandBatch;
+
+/* Update batch->ncmds to the number of execute cmds */
+static int smmuv3_issue_cmd_batch(SMMUState *bs, SMMUCommandBatch *batch)
+{
+ uint32_t total = batch->ncmds;
+ int ret;
+
+ ret = smmu_viommu_invalidate_cache(bs->viommu->core,
+ IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3,
+ sizeof(Cmd), &batch->ncmds, batch->cmds);
+ if (total != batch->ncmds) {
+ error_report("%s failed: ret=%d, total=%d, done=%d",
+ __func__, ret, total, batch->ncmds);
+ return ret;
+ }
+
+ batch->ncmds = 0;
+ batch->dev_cache = false;
+ return ret;
+}
+
+static int smmuv3_batch_cmds(SMMUState *bs, SMMUCommandBatch *batch,
+ Cmd *cmd, uint32_t *cons, bool dev_cache)
+{
+ int ret;
+
+ if (!bs->nested || !bs->viommu) {
+ return 0;
+ }
+
+ /*
+ * Currently separate dev_cache and hwpt for safety, which might not be
+ * necessary if underlying HW SMMU does not have the errata.
+ *
+ * TODO check IIDR register values read from hw_info.
+ */
+ if (batch->ncmds && (dev_cache != batch->dev_cache)) {
+ ret = smmuv3_issue_cmd_batch(bs, batch);
+ if (ret) {
+ *cons = batch->cons[batch->ncmds];
+ return ret;
+ }
+ }
+ batch->dev_cache = dev_cache;
+ batch->cmds[batch->ncmds] = *cmd;
+ batch->cons[batch->ncmds++] = *cons;
+ return 0;
+}
+
static int smmuv3_cmdq_consume(SMMUv3State *s)
{
SMMUState *bs = ARM_SMMU(s);
SMMUCmdError cmd_error = SMMU_CERROR_NONE;
SMMUQueue *q = &s->cmdq;
SMMUCommandType type = 0;
+ SMMUCommandBatch batch = {};
+ uint32_t ncmds = 0;
if (!smmuv3_cmdq_enabled(s)) {
return 0;
}
+
+ ncmds = smmuv3_q_ncmds(q);
+ batch.cmds = g_new0(Cmd, ncmds);
+ batch.cons = g_new0(uint32_t, ncmds);
+
/*
* some commands depend on register values, typically CR0. In case those
* register values change while handling the command, spec says it
@@ -1463,6 +1532,13 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
trace_smmuv3_cmdq_cfgi_cd(sid);
smmuv3_flush_config(sdev);
+
+ if (sdev->s1_hwpt) {
+ if (smmuv3_batch_cmds(sdev->smmu, &batch, &cmd, &q->cons, true)) {
+ cmd_error = SMMU_CERROR_ILL;
+ break;
+ }
+ }
break;
}
case SMMU_CMD_TLBI_NH_ASID:
@@ -1477,6 +1553,10 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
trace_smmuv3_cmdq_tlbi_nh_asid(asid);
smmu_inv_notifiers_all(&s->smmu_state);
smmu_iotlb_inv_asid(bs, asid);
+ if (smmuv3_batch_cmds(bs, &batch, &cmd, &q->cons, false)) {
+ cmd_error = SMMU_CERROR_ILL;
+ break;
+ }
break;
}
case SMMU_CMD_TLBI_NH_ALL:
@@ -1489,6 +1569,11 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
trace_smmuv3_cmdq_tlbi_nh();
smmu_inv_notifiers_all(&s->smmu_state);
smmu_iotlb_inv_all(bs);
+
+ if (smmuv3_batch_cmds(bs, &batch, &cmd, &q->cons, false)) {
+ cmd_error = SMMU_CERROR_ILL;
+ break;
+ }
break;
case SMMU_CMD_TLBI_NH_VAA:
case SMMU_CMD_TLBI_NH_VA:
@@ -1497,7 +1582,24 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
break;
}
smmuv3_range_inval(bs, &cmd);
+
+ if (smmuv3_batch_cmds(bs, &batch, &cmd, &q->cons, false)) {
+ cmd_error = SMMU_CERROR_ILL;
+ break;
+ }
break;
+ case SMMU_CMD_ATC_INV:
+ {
+ SMMUDevice *sdev = smmu_find_sdev(bs, CMD_SID(&cmd));
+
+ if (sdev->s1_hwpt) {
+ if (smmuv3_batch_cmds(sdev->smmu, &batch, &cmd, &q->cons, true)) {
+ cmd_error = SMMU_CERROR_ILL;
+ break;
+ }
+ }
+ break;
+ }
case SMMU_CMD_TLBI_S12_VMALL:
{
uint16_t vmid = CMD_VMID(&cmd);
@@ -1529,7 +1631,6 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
case SMMU_CMD_TLBI_EL2_ASID:
case SMMU_CMD_TLBI_EL2_VA:
case SMMU_CMD_TLBI_EL2_VAA:
- case SMMU_CMD_ATC_INV:
case SMMU_CMD_PRI_RESP:
case SMMU_CMD_RESUME:
case SMMU_CMD_STALL_TERM:
@@ -1554,12 +1655,22 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
*/
queue_cons_incr(q);
}
+ qemu_mutex_lock(&s->mutex);
+ if (!cmd_error && batch.ncmds && bs->viommu) {
+ if (smmuv3_issue_cmd_batch(bs, &batch)) {
+ q->cons = batch.cons[batch.ncmds];
+ cmd_error = SMMU_CERROR_ILL;
+ }
+ }
+ qemu_mutex_unlock(&s->mutex);
if (cmd_error) {
trace_smmuv3_cmdq_consume_error(smmu_cmd_string(type), cmd_error);
smmu_write_cmdq_err(s, cmd_error);
smmuv3_trigger_irq(s, SMMU_IRQ_GERROR, R_GERROR_CMDQ_ERR_MASK);
}
+ g_free(batch.cmds);
+ g_free(batch.cons);
trace_smmuv3_cmdq_consume_out(Q_PROD(q), Q_CONS(q),
Q_PROD_WRAP(q), Q_CONS_WRAP(q));
--
2.41.0.windows.1

View File

@ -0,0 +1,43 @@
From 9f3b8c283d4c1014ff292faddb78bbbfd7ec22d3 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Tue, 9 Apr 2024 01:49:26 +0000
Subject: [PATCH] hw/arm/smmuv3: Ignore IOMMU_NOTIFIER_MAP for nested-smmuv3
If a device's MemmoryRegion type is iommu, vfio core registers a listener,
passing the IOMMU_NOTIFIER_IOTLB_EVENTS flag (bundle of IOMMU_NOTIFIER_MAP
and IOMMU_NOTIFIER_UNMAP).
On the other hand, nested SMMUv3 does not use a map notifier. And it would
only insert an IOTLB entry for MSI doorbell page mapping, which can simply
be done by the mr->translate call.
Ignore the IOMMU_NOTIFIER_MAP flag and drop the error out.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmuv3.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 64ca4c5542..db111220c7 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1881,12 +1881,9 @@ static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
return -EINVAL;
}
- if (new & IOMMU_NOTIFIER_MAP) {
- error_setg(errp,
- "device %02x.%02x.%x requires iommu MAP notifier which is "
- "not currently supported", pci_bus_num(sdev->bus),
- PCI_SLOT(sdev->devfn), PCI_FUNC(sdev->devfn));
- return -EINVAL;
+ /* nested-smmuv3 does not need IOMMU_NOTIFIER_MAP. Ignore it. */
+ if (s->nested) {
+ new &= ~IOMMU_NOTIFIER_MAP;
}
if (old == IOMMU_NOTIFIER_NONE) {
--
2.41.0.windows.1

View File

@ -0,0 +1,135 @@
From 03964c037862a594b4eb7d2e3754acd32c01c80b Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Thu, 22 Sep 2022 14:06:07 -0700
Subject: [PATCH] hw/arm/smmuv3: Read host SMMU device info
Read the underlying SMMU device info and set corresponding IDR bits.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
hw/arm/smmuv3.c | 77 ++++++++++++++++++++++++++++++++++++
hw/arm/trace-events | 1 +
include/hw/arm/smmu-common.h | 1 +
3 files changed, 79 insertions(+)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index db111220c7..4208325ab3 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -254,6 +254,80 @@ void smmuv3_record_event(SMMUv3State *s, SMMUEventInfo *info)
info->recorded = true;
}
+static void smmuv3_nested_init_regs(SMMUv3State *s)
+{
+ SMMUState *bs = ARM_SMMU(s);
+ SMMUDevice *sdev;
+ uint32_t data_type;
+ uint32_t val;
+ int ret;
+
+ if (!bs->nested || !bs->viommu) {
+ return;
+ }
+
+ sdev = QLIST_FIRST(&bs->viommu->device_list);
+ if (!sdev) {
+ return;
+ }
+
+ if (sdev->info.idr[0]) {
+ error_report("reusing the previous hw_info");
+ goto out;
+ }
+
+ ret = smmu_dev_get_info(sdev, &data_type, sizeof(sdev->info), &sdev->info);
+ if (ret) {
+ error_report("failed to get SMMU device info");
+ return;
+ }
+
+ if (data_type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) {
+ error_report( "Wrong data type (%d)!", data_type);
+ return;
+ }
+
+out:
+ trace_smmuv3_get_device_info(sdev->info.idr[0], sdev->info.idr[1],
+ sdev->info.idr[3], sdev->info.idr[5]);
+
+ val = FIELD_EX32(sdev->info.idr[0], IDR0, BTM);
+ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, BTM, val);
+ val = FIELD_EX32(sdev->info.idr[0], IDR0, ATS);
+ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, ATS, val);
+ val = FIELD_EX32(sdev->info.idr[0], IDR0, ASID16);
+ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, ASID16, val);
+ val = FIELD_EX32(sdev->info.idr[0], IDR0, TERM_MODEL);
+ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, TERM_MODEL, val);
+ val = FIELD_EX32(sdev->info.idr[0], IDR0, STALL_MODEL);
+ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STALL_MODEL, val);
+ val = FIELD_EX32(sdev->info.idr[0], IDR0, STLEVEL);
+ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, STLEVEL, val);
+
+ val = FIELD_EX32(sdev->info.idr[1], IDR1, SIDSIZE);
+ s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SIDSIZE, val);
+ val = FIELD_EX32(sdev->info.idr[1], IDR1, SSIDSIZE);
+ s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SSIDSIZE, val);
+
+ val = FIELD_EX32(sdev->info.idr[3], IDR3, HAD);
+ s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, val);
+ val = FIELD_EX32(sdev->info.idr[3], IDR3, RIL);
+ s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, val);
+ val = FIELD_EX32(sdev->info.idr[3], IDR3, BBML);
+ s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, val);
+
+ val = FIELD_EX32(sdev->info.idr[5], IDR5, GRAN4K);
+ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, val);
+ val = FIELD_EX32(sdev->info.idr[5], IDR5, GRAN16K);
+ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, val);
+ val = FIELD_EX32(sdev->info.idr[5], IDR5, GRAN64K);
+ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, val);
+ val = FIELD_EX32(sdev->info.idr[5], IDR5, OAS);
+ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, val);
+
+ /* FIXME check iidr and aidr registrs too */
+}
+
static void smmuv3_init_regs(SMMUv3State *s)
{
/* Based on sys property, the stages supported in smmu will be advertised.*/
@@ -292,6 +366,9 @@ static void smmuv3_init_regs(SMMUv3State *s)
s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1);
s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1);
+ /* Override IDR fields with HW caps */
+ smmuv3_nested_init_regs(s);
+
s->cmdq.base = deposit64(s->cmdq.base, 0, 5, SMMU_CMDQS);
s->cmdq.prod = 0;
s->cmdq.cons = 0;
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 58e0636e95..1e3d86382d 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -55,5 +55,6 @@ smmuv3_cmdq_tlbi_s12_vmid(uint16_t vmid) "vmid=%d"
smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s"
smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s"
+smmuv3_get_device_info(uint32_t idr0, uint32_t idr1, uint32_t idr3, uint32_t idr5) "idr0=0x%x idr1=0x%x idr3=0x%x idr5=0x%x"
smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint16_t vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index 37dfeed026..d120c352cf 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -146,6 +146,7 @@ typedef struct SMMUDevice {
AddressSpace as_sysmem;
uint32_t cfg_cache_hits;
uint32_t cfg_cache_misses;
+ struct iommu_hw_info_arm_smmuv3 info;
QLIST_ENTRY(SMMUDevice) next;
} SMMUDevice;
--
2.41.0.windows.1

View File

@ -0,0 +1,47 @@
From a6c7b16107b506f85e6643604c923291e41f70d1 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 19 Jun 2024 04:42:33 +0000
Subject: [PATCH] hw/arm/virt: Add an SMMU_IO_LEN macro
A following patch will add a new MMIO region for nested SMMU instances.
This macro will be repeatedly used to set offsets and MMIO sizes in both
virt and virt-acpi-build.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/virt.c | 2 +-
include/hw/arm/virt.h | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8823f2ed1c..08c40c314b 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -155,7 +155,7 @@ static const MemMapEntry base_memmap[] = {
[VIRT_FW_CFG] = { 0x09020000, 0x00000018 },
[VIRT_GPIO] = { 0x09030000, 0x00001000 },
[VIRT_SECURE_UART] = { 0x09040000, 0x00001000 },
- [VIRT_SMMU] = { 0x09050000, 0x00020000 },
+ [VIRT_SMMU] = { 0x09050000, SMMU_IO_LEN },
[VIRT_PCDIMM_ACPI] = { 0x09070000, MEMORY_HOTPLUG_IO_LEN },
[VIRT_ACPI_GED] = { 0x09080000, ACPI_GED_EVT_SEL_LEN },
[VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN},
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 345b2d5594..e6a449becd 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -106,6 +106,9 @@ typedef enum {
ARM_L3_CACHE
} ArmCacheType;
+/* MMIO region size for SMMUv3 */
+#define SMMU_IO_LEN 0x20000
+
enum {
VIRT_FLASH,
VIRT_MEM,
--
2.41.0.windows.1

View File

@ -0,0 +1,187 @@
From 1746ba1aee671b9552540e36a629988b00846a82 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Tue, 5 Oct 2021 10:53:13 +0200
Subject: [PATCH] hw/arm/virt-acpi-build: Add IORT RMR regions to handle MSI
nested binding
To handle SMMUv3 nested stage support it is practical to
expose the guest with reserved memory regions (RMRs)
covering the IOVAs used by the host kernel to map
physical MSI doorbells.
Those IOVAs belong to [0x8000000, 0x8100000] matching
MSI_IOVA_BASE and MSI_IOVA_LENGTH definitions in kernel
arm-smmu-v3 driver. This is the window used to allocate
IOVAs matching physical MSI doorbells.
With those RMRs, the guest is forced to use a flat mapping
for this range. Hence the assigned device is programmed
with one IOVA from this range. Stage 1, owned by the guest
has a flat mapping for this IOVA. Stage2, owned by the VMM
then enforces a mapping from this IOVA to the physical
MSI doorbell.
The creation of those RMR nodes only is relevant if nested
stage SMMU is in use, along with VFIO. As VFIO devices can be
hotplugged, all RMRs need to be created in advance. Hence
the patch introduces a new arm virt "nested-smmuv3" iommu type.
ARM DEN 0049E.b IORT specification also mandates that when
RMRs are present, the OS must preserve PCIe configuration
performed by the boot FW. So along with the RMR IORT nodes,
a _DSM function #5, as defined by PCI FIRMWARE SPECIFICATION
EVISION 3.3, chapter 4.6.5 is added to PCIe host bridge
and PCIe expander bridge objects.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Suggested-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/virt-acpi-build.c | 71 +++++++++++++++++++++++++++++++++++-----
1 file changed, 63 insertions(+), 8 deletions(-)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 1d7839e4a0..ad0f79e03d 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -417,6 +417,14 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
.bus = vms->bus,
};
+ /*
+ * Nested SMMU requires RMRs for MSI 1-1 mapping, which
+ * require _DSM for PreservingPCI Boot Configurations
+ */
+ if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) {
+ cfg.preserve_config = true;
+ }
+
if (vms->highmem_mmio) {
cfg.mmio64 = memmap[VIRT_HIGH_PCIE_MMIO];
}
@@ -495,7 +503,7 @@ static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms)
#define IORT_NODE_OFFSET 48
static void build_iort_id_mapping(GArray *table_data, uint32_t input_base,
- uint32_t id_count, uint32_t out_ref)
+ uint32_t id_count, uint32_t out_ref, uint32_t flags)
{
/* Table 4 ID mapping format */
build_append_int_noprefix(table_data, input_base, 4); /* Input base */
@@ -503,7 +511,7 @@ static void build_iort_id_mapping(GArray *table_data, uint32_t input_base,
build_append_int_noprefix(table_data, input_base, 4); /* Output base */
build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */
/* Flags */
- build_append_int_noprefix(table_data, 0 /* Single mapping (disabled) */, 4);
+ build_append_int_noprefix(table_data, flags, 4); /* Flags */
}
struct AcpiIortIdMapping {
@@ -545,6 +553,50 @@ static int iort_idmap_compare(gconstpointer a, gconstpointer b)
return idmap_a->input_base - idmap_b->input_base;
}
+static void
+build_iort_rmr_nodes(GArray *table_data, GArray *smmu_idmaps,
+ size_t *smmu_offset, uint32_t *id)
+{
+ AcpiIortIdMapping *range;
+ int i;
+
+ for (i = 0; i < smmu_idmaps->len; i++) {
+ range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
+ int bdf = range->input_base;
+
+ /* Table 18 Reserved Memory Range Node */
+
+ build_append_int_noprefix(table_data, 6 /* RMR */, 1); /* Type */
+ /* Length */
+ build_append_int_noprefix(table_data, 28 + ID_MAPPING_ENTRY_SIZE + 20, 2);
+ build_append_int_noprefix(table_data, 3, 1); /* Revision */
+ build_append_int_noprefix(table_data, *id, 4); /* Identifier */
+ /* Number of ID mappings */
+ build_append_int_noprefix(table_data, 1, 4);
+ /* Reference to ID Array */
+ build_append_int_noprefix(table_data, 28, 4);
+
+ /* RMR specific data */
+
+ /* Flags */
+ build_append_int_noprefix(table_data, 0 /* Disallow remapping */, 4);
+ /* Number of Memory Range Descriptors */
+ build_append_int_noprefix(table_data, 1 , 4);
+ /* Reference to Memory Range Descriptors */
+ build_append_int_noprefix(table_data, 28 + ID_MAPPING_ENTRY_SIZE, 4);
+ build_iort_id_mapping(table_data, bdf, range->id_count, smmu_offset[i], 1);
+
+ /* Table 19 Memory Range Descriptor */
+
+ /* Physical Range offset */
+ build_append_int_noprefix(table_data, 0x8000000, 8);
+ /* Physical Range length */
+ build_append_int_noprefix(table_data, 0x100000, 8);
+ build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+ *id += 1;
+ }
+}
+
/*
* Input Output Remapping Table (IORT)
* Conforms to "IO Remapping Table System Software on ARM Platforms",
@@ -554,7 +606,6 @@ static void
build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
int i, nb_nodes, rc_mapping_count;
- const uint32_t iort_node_offset = IORT_NODE_OFFSET;
size_t node_size, *smmu_offset;
AcpiIortIdMapping *idmap;
hwaddr base;
@@ -563,7 +614,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
- AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id,
+ AcpiTable table = { .sig = "IORT", .rev = 5, .oem_id = vms->oem_id,
.oem_table_id = vms->oem_table_id };
/* Table 2 The IORT */
acpi_table_begin(&table, table_data);
@@ -668,7 +719,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, 0, 4);
/* output IORT node is the ITS group node (the first node) */
- build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
+ build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET, 0);
}
/* Table 17 Root Complex Node */
@@ -709,7 +760,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
/* output IORT node is the smmuv3 node */
build_iort_id_mapping(table_data, range->input_base,
- range->id_count, smmu_offset[i]);
+ range->id_count, smmu_offset[i], 0);
}
/* bypassed RIDs connect to ITS group node directly: RC -> ITS */
@@ -717,11 +768,15 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
range = &g_array_index(its_idmaps, AcpiIortIdMapping, i);
/* output IORT node is the ITS group node (the first node) */
build_iort_id_mapping(table_data, range->input_base,
- range->id_count, iort_node_offset);
+ range->id_count, IORT_NODE_OFFSET, 0);
}
} else {
/* output IORT node is the ITS group node (the first node) */
- build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET);
+ build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET, 0);
+ }
+
+ if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) {
+ build_iort_rmr_nodes(table_data, smmu_idmaps, smmu_offset, &id);
}
acpi_table_end(linker, &table);
--
2.41.0.windows.1

View File

@ -0,0 +1,155 @@
From a7ffb5856940a1515ef84a4d4644b7c7c07afb8f Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Wed, 6 Nov 2024 19:22:13 +0000
Subject: [PATCH] hw/arm/virt-acpi-build: Build IORT with multiple SMMU nodes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Now that we can have multiple user-creatable smmuv3-nested
devices, each associated with different pci buses, update
IORT ID mappings accordingly.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/virt-acpi-build.c | 43 ++++++++++++++++++++++++++++------------
include/hw/arm/virt.h | 6 ++++++
2 files changed, 36 insertions(+), 13 deletions(-)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 076781423b..1d7839e4a0 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -555,8 +555,10 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
int i, nb_nodes, rc_mapping_count;
const uint32_t iort_node_offset = IORT_NODE_OFFSET;
- size_t node_size, smmu_offset = 0;
+ size_t node_size, *smmu_offset;
AcpiIortIdMapping *idmap;
+ hwaddr base;
+ int irq, num_smmus = 0;
uint32_t id = 0;
GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
@@ -566,7 +568,21 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
/* Table 2 The IORT */
acpi_table_begin(&table, table_data);
- if (vms->iommu == VIRT_IOMMU_SMMUV3) {
+ if (vms->smmu_nested_count) {
+ irq = vms->irqmap[VIRT_SMMU_NESTED] + ARM_SPI_BASE;
+ base = vms->memmap[VIRT_SMMU_NESTED].base;
+ num_smmus = vms->smmu_nested_count;
+ } else if (virt_has_smmuv3(vms)) {
+ irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
+ base = vms->memmap[VIRT_SMMU].base;
+ num_smmus = 1;
+ }
+
+ smmu_offset = g_new0(size_t, num_smmus);
+ nb_nodes = 2; /* RC, ITS */
+ nb_nodes += num_smmus; /* SMMU nodes */
+
+ if (virt_has_smmuv3(vms)) {
AcpiIortIdMapping next_range = {0};
object_child_foreach_recursive(object_get_root(),
@@ -588,18 +604,19 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
}
next_range.input_base = idmap->input_base + idmap->id_count;
+ if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) {
+ nb_nodes++; /* RMR node per SMMU */
+ }
}
/* Append the last RC -> ITS ID mapping */
- if (next_range.input_base < 0xFFFF) {
- next_range.id_count = 0xFFFF - next_range.input_base;
+ if (next_range.input_base < 0x10000) {
+ next_range.id_count = 0x10000 - next_range.input_base;
g_array_append_val(its_idmaps, next_range);
}
- nb_nodes = 3; /* RC, ITS, SMMUv3 */
rc_mapping_count = smmu_idmaps->len + its_idmaps->len;
} else {
- nb_nodes = 2; /* RC, ITS */
rc_mapping_count = 1;
}
/* Number of IORT Nodes */
@@ -621,10 +638,9 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
/* GIC ITS Identifier Array */
build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4);
- if (vms->iommu == VIRT_IOMMU_SMMUV3) {
- int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
+ for (i = 0; i < num_smmus; i++) {
+ smmu_offset[i] = table_data->len - table.table_offset;
- smmu_offset = table_data->len - table.table_offset;
/* Table 9 SMMUv3 Format */
build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */
node_size = SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE;
@@ -635,7 +651,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
/* Reference to ID Array */
build_append_int_noprefix(table_data, SMMU_V3_ENTRY_SIZE, 4);
/* Base address */
- build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8);
+ build_append_int_noprefix(table_data, base + (i * SMMU_IO_LEN), 8);
/* Flags */
build_append_int_noprefix(table_data, 1 /* COHACC Override */, 4);
build_append_int_noprefix(table_data, 0, 4); /* Reserved */
@@ -646,12 +662,13 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, irq + 1, 4); /* PRI */
build_append_int_noprefix(table_data, irq + 3, 4); /* GERR */
build_append_int_noprefix(table_data, irq + 2, 4); /* Sync */
+ irq += NUM_SMMU_IRQS;
build_append_int_noprefix(table_data, 0, 4); /* Proximity domain */
/* DeviceID mapping index (ignored since interrupts are GSIV based) */
build_append_int_noprefix(table_data, 0, 4);
/* output IORT node is the ITS group node (the first node) */
- build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET);
+ build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
}
/* Table 17 Root Complex Node */
@@ -684,7 +701,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, 0, 3); /* Reserved */
/* Output Reference */
- if (vms->iommu == VIRT_IOMMU_SMMUV3) {
+ if (virt_has_smmuv3(vms)) {
AcpiIortIdMapping *range;
/* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */
@@ -692,7 +709,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
/* output IORT node is the smmuv3 node */
build_iort_id_mapping(table_data, range->input_base,
- range->id_count, smmu_offset);
+ range->id_count, smmu_offset[i]);
}
/* bypassed RIDs connect to ITS group node directly: RC -> ITS */
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index cd41e28202..bc3c8b70da 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -295,4 +295,10 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms)
vms->highmem_redists) ? 2 : 1;
}
+static inline bool virt_has_smmuv3(const VirtMachineState *vms)
+{
+ return vms->iommu == VIRT_IOMMU_SMMUV3 ||
+ vms->iommu == VIRT_IOMMU_SMMUV3_NESTED;
+}
+
#endif /* QEMU_ARM_VIRT_H */
--
2.41.0.windows.1

View File

@ -0,0 +1,33 @@
From 5405fa36c5f2784a9a6b19ee60d44b6cffb9f769 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Sat, 11 Jan 2025 10:52:57 +0800
Subject: [PATCH] hw/i386: Activate IOMMUFD for q35 machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
---
hw/i386/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 682e324f1c..908f29e02b 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -105,6 +105,7 @@ config Q35
imply E1000E_PCI_EXPRESS
imply VMPORT
imply VMMOUSE
+ imply IOMMUFD
select PC_PCI
select PC_ACPI
select PCI_EXPRESS_Q35
--
2.41.0.windows.1

View File

@ -0,0 +1,50 @@
From 7e1bd6e7e109c6228bc4c40ea6f2af2d7f281fca Mon Sep 17 00:00:00 2001
From: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
Date: Tue, 8 Apr 2025 05:59:29 -0400
Subject: [PATCH] hw/misc/aspeed_hace: Fix buffer overflow in has_padding
function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cheery-pick from 78877b2e06464f49f777e086845e094ea7bc82ef
The maximum padding size is either 64 or 128 bytes and should always be smaller
than "req_len". If "padding_size" exceeds "req_len", then
"req_len - padding_size" underflows due to "uint32_t" data type, leading to a
large incorrect value (e.g., `0xFFXXXXXX`). This causes an out-of-bounds memory
access, potentially leading to a buffer overflow.
Added a check to ensure "padding_size" does not exceed "req_len" before
computing "pad_offset". This prevents "req_len - padding_size" from underflowing
and avoids accessing invalid memory.
Signed-off-by: Jamin Lin <jamin_lin@aspeedtech.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Fixes: 5cd7d8564a8b563da724b9e6264c967f0a091afa ("aspeed/hace: Support AST2600 HACE ")
Link: https://lore.kernel.org/qemu-devel/20250321092623.2097234-3-jamin_lin@aspeedtech.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
---
hw/misc/aspeed_hace.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c
index b07506ec04..8706e3d376 100644
--- a/hw/misc/aspeed_hace.c
+++ b/hw/misc/aspeed_hace.c
@@ -123,6 +123,11 @@ static bool has_padding(AspeedHACEState *s, struct iovec *iov,
if (*total_msg_len <= s->total_req_len) {
uint32_t padding_size = s->total_req_len - *total_msg_len;
uint8_t *padding = iov->iov_base;
+
+ if (padding_size > req_len) {
+ return false;
+ }
+
*pad_offset = req_len - padding_size;
if (padding[*pad_offset] == 0x80) {
return true;
--
2.41.0.windows.1

View File

@ -0,0 +1,36 @@
From 43fdaaa492ea10ab0e90ec4cc68ec45aed1d415c Mon Sep 17 00:00:00 2001
From: gubin <gubin_yewu@cmss.chinamobile.com>
Date: Sat, 22 Mar 2025 15:20:27 +0800
Subject: [PATCH] hw/nvme: fix invalid check on mcl
cherry-pick from 8c78015a55d84c016da6d5e41b6b5f618ecb25ab
The number of logical blocks within a source range is converted into a
1s based number at the time of parsing. However, when verifying the copy
length we add one again, causing the check against MCL to fail in error.
Cc: qemu-stable@nongnu.org
Fixes: 381ab99d8587 ("hw/nvme: check maximum copy length (MCL) for COPY")
Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Signed-off-by: gubin <gubin_yewu@cmss.chinamobile.com>
---
hw/nvme/ctrl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 29445938d5..407004b2f7 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -2863,7 +2863,7 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns,
uint32_t nlb;
nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL,
&nlb, NULL, NULL, NULL);
- copy_len += nlb + 1;
+ copy_len += nlb;
}
if (copy_len > ns->id_ns.mcl) {
--
2.41.0.windows.1

View File

@ -0,0 +1,42 @@
From 6de964bac51139ef24f43bde56933cd8eafaf317 Mon Sep 17 00:00:00 2001
From: gubin <gubin_yewu@cmss.chinamobile.com>
Date: Sat, 22 Mar 2025 15:25:39 +0800
Subject: [PATCH] hw/nvme: fix invalid endian conversion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cherry-pick from d2b5bb860e6c17442ad95cc275feb07c1665be5c
numcntl is one byte and so is max_vfs. Using cpu_to_le16 on big endian
hosts results in numcntl being set to 0.
Fix by dropping the endian conversion.
Fixes: 99f48ae7ae ("hw/nvme: Add support for Secondary Controller List")
Reported-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
Message-ID: <20240222-fix-sriov-numcntl-v1-1-d60bea5e72d0@samsung.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: gubin <gubin_yewu@cmss.chinamobile.com>
---
hw/nvme/ctrl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 29445938d5..9410344844 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7928,7 +7928,7 @@ static void nvme_init_state(NvmeCtrl *n)
n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
QTAILQ_INIT(&n->aer_queue);
- list->numcntl = cpu_to_le16(max_vfs);
+ list->numcntl = max_vfs;
for (i = 0; i < max_vfs; i++) {
sctrl = &list->sec[i];
sctrl->pcid = cpu_to_le16(n->cntlid);
--
2.41.0.windows.1

View File

@ -0,0 +1,95 @@
From 03f9b12e33238587da36be24523911fd1b003324 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:38 +0800
Subject: [PATCH] hw/pci: Introduce helper function
pci_device_get_iommu_bus_devfn()
Extract out pci_device_get_iommu_bus_devfn() from
pci_device_iommu_address_space() to facilitate
implementation of pci_device_[set|unset]_iommu_device()
in following patch.
No functional change intended.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/pci/pci.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 45 insertions(+), 3 deletions(-)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 7467a2a9de..0884fbb760 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2681,11 +2681,27 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data)
}
}
-AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+/*
+ * Get IOMMU root bus, aliased bus and devfn of a PCI device
+ *
+ * IOMMU root bus is needed by all call sites to call into iommu_ops.
+ * For call sites which don't need aliased BDF, passing NULL to
+ * aliased_[bus|devfn] is allowed.
+ *
+ * @piommu_bus: return root #PCIBus backed by an IOMMU for the PCI device.
+ *
+ * @aliased_bus: return aliased #PCIBus of the PCI device, optional.
+ *
+ * @aliased_devfn: return aliased devfn of the PCI device, optional.
+ */
+static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
+ PCIBus **piommu_bus,
+ PCIBus **aliased_bus,
+ int *aliased_devfn)
{
PCIBus *bus = pci_get_bus(dev);
PCIBus *iommu_bus = bus;
- uint8_t devfn = dev->devfn;
+ int devfn = dev->devfn;
while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) {
PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
@@ -2726,7 +2742,33 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
iommu_bus = parent_bus;
}
- if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
+
+ assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+ assert(iommu_bus);
+
+ if (pci_bus_bypass_iommu(bus) || !iommu_bus->iommu_ops) {
+ iommu_bus = NULL;
+ }
+
+ *piommu_bus = iommu_bus;
+
+ if (aliased_bus) {
+ *aliased_bus = bus;
+ }
+
+ if (aliased_devfn) {
+ *aliased_devfn = devfn;
+ }
+}
+
+AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus) {
return iommu_bus->iommu_ops->get_address_space(bus,
iommu_bus->iommu_opaque, devfn);
}
--
2.41.0.windows.1

View File

@ -0,0 +1,120 @@
From 7bc73d38984460315df315d007789f87f4d11994 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Wed, 5 Jun 2024 16:30:39 +0800
Subject: [PATCH] hw/pci: Introduce pci_device_[set|unset]_iommu_device()
pci_device_[set|unset]_iommu_device() call pci_device_get_iommu_bus_devfn()
to get iommu_bus->iommu_ops and call [set|unset]_iommu_device callback to
set/unset HostIOMMUDevice for a given PCI device.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/pci/pci.c | 27 +++++++++++++++++++++++++++
include/hw/pci/pci.h | 38 +++++++++++++++++++++++++++++++++++++-
2 files changed, 64 insertions(+), 1 deletion(-)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 0884fbb760..d6f627aa51 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2775,6 +2775,33 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
return &address_space_memory;
}
+bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
+ Error **errp)
+{
+ PCIBus *iommu_bus;
+
+ /* set_iommu_device requires device's direct BDF instead of aliased BDF */
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL);
+ if (iommu_bus && iommu_bus->iommu_ops->set_iommu_device) {
+ return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev),
+ iommu_bus->iommu_opaque,
+ dev->devfn, hiod, errp);
+ }
+ return true;
+}
+
+void pci_device_unset_iommu_device(PCIDevice *dev)
+{
+ PCIBus *iommu_bus;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL);
+ if (iommu_bus && iommu_bus->iommu_ops->unset_iommu_device) {
+ return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev),
+ iommu_bus->iommu_opaque,
+ dev->devfn);
+ }
+}
+
void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
/*
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index cee0cf7460..8d1af44249 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -3,6 +3,7 @@
#include "exec/memory.h"
#include "sysemu/dma.h"
+#include "sysemu/host_iommu_device.h"
/* PCI includes legacy ISA access. */
#include "hw/isa/isa.h"
@@ -384,10 +385,45 @@ typedef struct PCIIOMMUOps {
*
* @devfn: device and function number
*/
- AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+ AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+ /**
+ * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU
+ *
+ * Optional callback, if not implemented in vIOMMU, then vIOMMU can't
+ * retrieve host information from the associated HostIOMMUDevice.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ *
+ * @dev: the #HostIOMMUDevice to attach.
+ *
+ * @errp: pass an Error out only when return false
+ *
+ * Returns: true if HostIOMMUDevice is attached or else false with errp set.
+ */
+ bool (*set_iommu_device)(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *dev, Error **errp);
+ /**
+ * @unset_iommu_device: detach a HostIOMMUDevice from a vIOMMU
+ *
+ * Optional callback.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ */
+ void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn);
} PCIIOMMUOps;
AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
+bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
+ Error **errp);
+void pci_device_unset_iommu_device(PCIDevice *dev);
/**
* pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
--
2.41.0.windows.1

View File

@ -0,0 +1,41 @@
From c1f1346eea8da6552e085aa13630bbf5227db00f Mon Sep 17 00:00:00 2001
From: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
Date: Mon, 7 Apr 2025 12:54:10 -0400
Subject: [PATCH] hw/pci-host/designware: Fix ATU_UPPER_TARGET register access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cheery-pick from 04e99f9eb7920b0f0fcce65686c3bedf5e32a1f9
Fix copy/paste error writing to the ATU_UPPER_TARGET
register, we want to update the upper 32 bits.
Cc: qemu-stable@nongnu.org
Reported-by: Joey <jeundery@gmail.com>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2861
Fixes: d64e5eabc4c ("pci: Add support for Designware IP block")
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Gustavo Romero <gustavo.romero@linaro.org>
Message-Id: <20250331152041.74533-2-philmd@linaro.org>
Signed-off-by: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
---
hw/pci-host/designware.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c
index f477f97847..004142709c 100644
--- a/hw/pci-host/designware.c
+++ b/hw/pci-host/designware.c
@@ -360,7 +360,7 @@ static void designware_pcie_root_config_write(PCIDevice *d, uint32_t address,
case DESIGNWARE_PCIE_ATU_UPPER_TARGET:
viewport->target &= 0x00000000FFFFFFFFULL;
- viewport->target |= val;
+ viewport->target |= (uint64_t)val << 32;
break;
case DESIGNWARE_PCIE_ATU_LIMIT:
--
2.41.0.windows.1

View File

@ -0,0 +1,119 @@
From 37308e60d43323c0ea65d734487ce6542f8a9d3b Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Tue, 5 Oct 2021 10:53:12 +0200
Subject: [PATCH] hw/pci-host/gpex: [needs kernel fix] Allow to generate
preserve boot config DSM #5
Add a 'preserve_config' field in struct GPEXConfig and
if set, generate the DSM #5 for preserving PCI boot configurations.
The DSM presence is needed to expose RMRs.
At the moment the DSM generation is not yet enabled.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/pci-host/gpex-acpi.c | 35 +++++++++++++++++++++++++++++++----
include/hw/pci-host/gpex.h | 1 +
2 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index ac5d229757..ce424fc9da 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -49,9 +49,10 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
}
}
-static void acpi_dsdt_add_pci_osc(Aml *dev)
+static void acpi_dsdt_add_pci_osc(Aml *dev, bool preserve_config)
{
Aml *method, *UUID, *ifctx, *ifctx1, *elsectx, *buf;
+ uint8_t byte_list[1] = {0};
/* Declare an _OSC (OS Control Handoff) method */
aml_append(dev, aml_name_decl("SUPP", aml_int(0)));
@@ -113,10 +114,24 @@ static void acpi_dsdt_add_pci_osc(Aml *dev)
UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D");
ifctx = aml_if(aml_equal(aml_arg(0), UUID));
ifctx1 = aml_if(aml_equal(aml_arg(2), aml_int(0)));
- uint8_t byte_list[1] = {0};
+ if (preserve_config) {
+ /* support for functions other than function 0 and function 5 */
+ byte_list[0] = 0x21;
+ }
buf = aml_buffer(1, byte_list);
aml_append(ifctx1, aml_return(buf));
aml_append(ifctx, ifctx1);
+
+ if (preserve_config) {
+ Aml *ifctx2 = aml_if(aml_equal(aml_arg(2), aml_int(5)));
+ /*
+ * 0 - The operating system must not ignore the PCI configuration that
+ * firmware has done at boot time.
+ */
+ aml_append(ifctx2, aml_return(aml_int(0)));
+ aml_append(ifctx, ifctx2);
+ }
+
aml_append(method, ifctx);
byte_list[0] = 0;
@@ -174,6 +189,12 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node)));
}
+ if (cfg->preserve_config) {
+ method = aml_method("_DSM", 5, AML_SERIALIZED);
+ aml_append(method, aml_return(aml_int(0)));
+ aml_append(dev, method);
+ }
+
acpi_dsdt_add_pci_route_table(dev, cfg->irq);
/*
@@ -188,7 +209,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
if (is_cxl) {
build_cxl_osc_method(dev);
} else {
- acpi_dsdt_add_pci_osc(dev);
+ acpi_dsdt_add_pci_osc(dev, cfg->preserve_config);
}
aml_append(scope, dev);
@@ -205,6 +226,12 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device")));
aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+ if (cfg->preserve_config) {
+ method = aml_method("_DSM", 5, AML_SERIALIZED);
+ aml_append(method, aml_return(aml_int(0)));
+ aml_append(dev, method);
+ }
+
acpi_dsdt_add_pci_route_table(dev, cfg->irq);
method = aml_method("_CBA", 0, AML_NOTSERIALIZED);
@@ -263,7 +290,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
}
aml_append(dev, aml_name_decl("_CRS", rbuf));
- acpi_dsdt_add_pci_osc(dev);
+ acpi_dsdt_add_pci_osc(dev, cfg->preserve_config);
Aml *dev_res0 = aml_device("%s", "RES0");
aml_append(dev_res0, aml_name_decl("_HID", aml_string("PNP0C02")));
diff --git a/include/hw/pci-host/gpex.h b/include/hw/pci-host/gpex.h
index b0240bd768..65475f7f9d 100644
--- a/include/hw/pci-host/gpex.h
+++ b/include/hw/pci-host/gpex.h
@@ -64,6 +64,7 @@ struct GPEXConfig {
MemMapEntry pio;
int irq;
PCIBus *bus;
+ bool preserve_config;
};
int gpex_set_irq_num(GPEXHost *s, int index, int gsi);
--
2.41.0.windows.1

View File

@ -0,0 +1,46 @@
From 3746a434596b9bc20994c869c79fb9db24227418 Mon Sep 17 00:00:00 2001
From: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
Date: Mon, 7 Apr 2025 13:56:18 -0400
Subject: [PATCH] hw/sd/sdhci: free irq on exit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cheery-pick from 1c2d03bb0889b7a9a677d53126fb035190683af4
Fix a memory leak bug in sdhci_pci_realize() due to s->irq
not being freed in sdhci_pci_exit().
Signed-off-by: Zheng Huang <hz1624917200@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <09ddf42b-a6db-42d5-954b-148d09d8d6cc@gmail.com>
[PMD: Moved qemu_free_irq() call before sdhci_common_unrealize()]
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
---
hw/sd/sdhci-pci.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/hw/sd/sdhci-pci.c b/hw/sd/sdhci-pci.c
index 9b7bee8b3f..c1eb67cf29 100644
--- a/hw/sd/sdhci-pci.c
+++ b/hw/sd/sdhci-pci.c
@@ -18,6 +18,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/module.h"
+#include "hw/irq.h"
#include "hw/qdev-properties.h"
#include "hw/sd/sdhci.h"
#include "sdhci-internal.h"
@@ -49,6 +50,7 @@ static void sdhci_pci_exit(PCIDevice *dev)
{
SDHCIState *s = PCI_SDHCI(dev);
+ qemu_free_irq(s->irq);
sdhci_common_unrealize(s);
sdhci_uninitfn(s);
}
--
2.41.0.windows.1

View File

@ -0,0 +1,46 @@
From 068fef175047c18f60900dacd54c7a436114c164 Mon Sep 17 00:00:00 2001
From: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
Date: Mon, 7 Apr 2025 13:18:47 -0400
Subject: [PATCH] hw/ufs: free irq on exit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cheery-pick from c458f9474d6574505ce9144ab1a90b951e69c1bd
Fix a memory leak bug in ufs_init_pci() due to u->irq
not being freed in ufs_exit().
Signed-off-by: Zheng Huang <hz1624917200@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <43ceb427-87aa-44ee-9007-dbaecc499bba@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
---
hw/ufs/ufs.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c
index 068895b27b..f57d33e771 100644
--- a/hw/ufs/ufs.c
+++ b/hw/ufs/ufs.c
@@ -25,6 +25,7 @@
#include "qapi/error.h"
#include "migration/vmstate.h"
#include "scsi/constants.h"
+#include "hw/irq.h"
#include "trace.h"
#include "ufs.h"
@@ -1286,6 +1287,8 @@ static void ufs_exit(PCIDevice *pci_dev)
{
UfsHc *u = UFS(pci_dev);
+ qemu_free_irq(u->irq);
+
qemu_bh_delete(u->doorbell_bh);
qemu_bh_delete(u->complete_bh);
--
2.41.0.windows.1

View File

@ -0,0 +1,43 @@
From 5eb0bb1f8ce9835b368e78d414ff6136c77ef94b Mon Sep 17 00:00:00 2001
From: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
Date: Tue, 8 Apr 2025 06:51:26 -0400
Subject: [PATCH] hw/xen: Fix xen_bus_realize() error handling
cheery-pick from de7b18083bfed4e1a01bb40b4ad050c47d2011fa
The Error ** argument must be NULL, &error_abort, &error_fatal, or a
pointer to a variable containing NULL. Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call.
xen_bus_realize() is wrong that way: it passes &local_err to
xs_node_watch() in a loop. If this fails in more than one iteration,
it can trip error_setv()'s assertion.
Fix by clearing @local_err.
Fixes: c4583c8c394e (xen-bus: reduce scope of backend watch)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-ID: <20250314143500.2449658-2-armbru@redhat.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
---
hw/xen/xen-bus.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index 4973e7d9c9..c10b089914 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -352,6 +352,7 @@ static void xen_bus_realize(BusState *bus, Error **errp)
error_reportf_err(local_err,
"failed to set up '%s' enumeration watch: ",
type[i]);
+ local_err = NULL;
}
g_free(node);
--
2.41.0.windows.1

View File

@ -0,0 +1,38 @@
From 0d5ac4f36208eadbb922f552ba1b762f5bd0c3a6 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 24 Jan 2024 21:40:15 -0500
Subject: [PATCH] i386/cpuid: Remove subleaf constraint on CPUID leaf 1F
commit a3b5376521a0de898440e8d0942b54e628f0949f upstream.
No such constraint that subleaf index needs to be less than 64.
Intel-SIG: commit a3b5376521a0 i386/cpuid: Remove subleaf constraint on CPUID leaf 1F
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by:Yang Weijiang <weijiang.yang@intel.com>
Message-ID: <20240125024016.2521244-3-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/kvm/kvm.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index ce96ed9158..850104f6b5 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1928,10 +1928,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
break;
}
- if (i == 0x1f && j == 64) {
- break;
- }
-
c->function = i;
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
c->index = j;
--
2.41.0.windows.1

View File

@ -0,0 +1,70 @@
From 4ef1b086272552378c09356b0e9fd2548a27a621 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:43 +0800
Subject: [PATCH] intel_iommu: Check compatibility with host IOMMU capabilities
If check fails, host device (either VFIO or VDPA device) is not
compatible with current vIOMMU config and should not be passed to
guest.
Only aw_bits is checked for now, we don't care about other caps
before scalable modern mode is introduced.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/i386/intel_iommu.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index bdc14f8438..60d86e0cb6 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3838,6 +3838,30 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
return vtd_dev_as;
}
+static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+ Error **errp)
+{
+ HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
+ int ret;
+
+ if (!hiodc->get_cap) {
+ error_setg(errp, ".get_cap() not implemented");
+ return false;
+ }
+
+ /* Common checks */
+ ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_AW_BITS, errp);
+ if (ret < 0) {
+ return false;
+ }
+ if (s->aw_bits > ret) {
+ error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret);
+ return false;
+ }
+
+ return true;
+}
+
static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
HostIOMMUDevice *hiod, Error **errp)
{
@@ -3858,6 +3882,11 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
return false;
}
+ if (!vtd_check_hiod(s, hiod, errp)) {
+ vtd_iommu_unlock(s);
+ return false;
+ }
+
new_key = g_malloc(sizeof(*new_key));
new_key->bus = bus;
new_key->devfn = devfn;
--
2.41.0.windows.1

View File

@ -0,0 +1,142 @@
From a051e4349316d7065c9418de691787edae8e7f4e Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:41 +0800
Subject: [PATCH] intel_iommu: Extract out vtd_cap_init() to initialize
cap/ecap
Extract cap/ecap initialization in vtd_cap_init() to make code
cleaner.
No functional change intended.
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/i386/intel_iommu.c | 93 ++++++++++++++++++++++++-------------------
1 file changed, 51 insertions(+), 42 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3da56e439e..6716407b7a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3935,30 +3935,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
return;
}
-/* Do the initialization. It will also be called when reset, so pay
- * attention when adding new initialization stuff.
- */
-static void vtd_init(IntelIOMMUState *s)
+static void vtd_cap_init(IntelIOMMUState *s)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
- memset(s->csr, 0, DMAR_REG_SIZE);
- memset(s->wmask, 0, DMAR_REG_SIZE);
- memset(s->w1cmask, 0, DMAR_REG_SIZE);
- memset(s->womask, 0, DMAR_REG_SIZE);
-
- s->root = 0;
- s->root_scalable = false;
- s->dmar_enabled = false;
- s->intr_enabled = false;
- s->iq_head = 0;
- s->iq_tail = 0;
- s->iq = 0;
- s->iq_size = 0;
- s->qi_enabled = false;
- s->iq_last_desc_type = VTD_INV_DESC_NONE;
- s->iq_dw = false;
- s->next_frcd_reg = 0;
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
VTD_CAP_MGAW(s->aw_bits);
@@ -3975,27 +3955,6 @@ static void vtd_init(IntelIOMMUState *s)
}
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
- /*
- * Rsvd field masks for spte
- */
- vtd_spte_rsvd[0] = ~0ULL;
- vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits,
- x86_iommu->dt_supported);
- vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
- vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
- vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
-
- vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits,
- x86_iommu->dt_supported);
- vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
- x86_iommu->dt_supported);
-
- if (s->scalable_mode || s->snoop_control) {
- vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
- vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
- vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
- }
-
if (x86_iommu_ir_supported(x86_iommu)) {
s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
if (s->intr_eim == ON_OFF_AUTO_ON) {
@@ -4028,6 +3987,56 @@ static void vtd_init(IntelIOMMUState *s)
if (s->pasid) {
s->ecap |= VTD_ECAP_PASID;
}
+}
+
+/*
+ * Do the initialization. It will also be called when reset, so pay
+ * attention when adding new initialization stuff.
+ */
+static void vtd_init(IntelIOMMUState *s)
+{
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+ memset(s->csr, 0, DMAR_REG_SIZE);
+ memset(s->wmask, 0, DMAR_REG_SIZE);
+ memset(s->w1cmask, 0, DMAR_REG_SIZE);
+ memset(s->womask, 0, DMAR_REG_SIZE);
+
+ s->root = 0;
+ s->root_scalable = false;
+ s->dmar_enabled = false;
+ s->intr_enabled = false;
+ s->iq_head = 0;
+ s->iq_tail = 0;
+ s->iq = 0;
+ s->iq_size = 0;
+ s->qi_enabled = false;
+ s->iq_last_desc_type = VTD_INV_DESC_NONE;
+ s->iq_dw = false;
+ s->next_frcd_reg = 0;
+
+ vtd_cap_init(s);
+
+ /*
+ * Rsvd field masks for spte
+ */
+ vtd_spte_rsvd[0] = ~0ULL;
+ vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits,
+ x86_iommu->dt_supported);
+ vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
+ vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
+ vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
+
+ vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits,
+ x86_iommu->dt_supported);
+ vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
+ x86_iommu->dt_supported);
+
+ if (s->scalable_mode || s->snoop_control) {
+ vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
+ vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
+ vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
+ }
vtd_reset_caches(s);
--
2.41.0.windows.1

View File

@ -0,0 +1,160 @@
From 5834bb1ccce592380a91a5cf127f90a031cd7cf2 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Wed, 5 Jun 2024 16:30:42 +0800
Subject: [PATCH] intel_iommu: Implement [set|unset]_iommu_device() callbacks
Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
In set call, we take a reference of HostIOMMUDevice and store it
in hash table indexed by PCI BDF.
Note this BDF index is device's real BDF not the aliased one which
is different from the index of VTDAddressSpace. There can be multiple
assigned devices under same virtual iommu group and share same
VTDAddressSpace, but each has its own HostIOMMUDevice.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/i386/intel_iommu.c | 81 +++++++++++++++++++++++++++++++++++
include/hw/i386/intel_iommu.h | 2 +
2 files changed, 83 insertions(+)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 6716407b7a..bdc14f8438 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -61,6 +61,12 @@ struct vtd_as_key {
uint32_t pasid;
};
+/* bus/devfn is PCI device's real BDF not the aliased one */
+struct vtd_hiod_key {
+ PCIBus *bus;
+ uint8_t devfn;
+};
+
struct vtd_iotlb_key {
uint64_t gfn;
uint32_t pasid;
@@ -250,6 +256,25 @@ static guint vtd_as_hash(gconstpointer v)
return (guint)(value << 8 | key->devfn);
}
+/* Same implementation as vtd_as_hash() */
+static guint vtd_hiod_hash(gconstpointer v)
+{
+ return vtd_as_hash(v);
+}
+
+static gboolean vtd_hiod_equal(gconstpointer v1, gconstpointer v2)
+{
+ const struct vtd_hiod_key *key1 = v1;
+ const struct vtd_hiod_key *key2 = v2;
+
+ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
+
+static void vtd_hiod_destroy(gpointer v)
+{
+ object_unref(v);
+}
+
static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
gpointer user_data)
{
@@ -3813,6 +3838,58 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
return vtd_dev_as;
}
+static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+ IntelIOMMUState *s = opaque;
+ struct vtd_as_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+ struct vtd_as_key *new_key;
+
+ assert(hiod);
+
+ vtd_iommu_lock(s);
+
+ if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
+ error_setg(errp, "Host IOMMU device already exist");
+ vtd_iommu_unlock(s);
+ return false;
+ }
+
+ new_key = g_malloc(sizeof(*new_key));
+ new_key->bus = bus;
+ new_key->devfn = devfn;
+
+ object_ref(hiod);
+ g_hash_table_insert(s->vtd_host_iommu_dev, new_key, hiod);
+
+ vtd_iommu_unlock(s);
+
+ return true;
+}
+
+static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
+{
+ IntelIOMMUState *s = opaque;
+ struct vtd_as_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+
+ vtd_iommu_lock(s);
+
+ if (!g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
+ vtd_iommu_unlock(s);
+ return;
+ }
+
+ g_hash_table_remove(s->vtd_host_iommu_dev, &key);
+
+ vtd_iommu_unlock(s);
+}
+
/* Unmap the whole range in the notifier's scope. */
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
{
@@ -4117,6 +4194,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
static PCIIOMMUOps vtd_iommu_ops = {
.get_address_space = vtd_host_dma_iommu,
+ .set_iommu_device = vtd_dev_set_iommu_device,
+ .unset_iommu_device = vtd_dev_unset_iommu_device,
};
static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
@@ -4240,6 +4319,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
g_free, g_free);
s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
g_free, g_free);
+ s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_hiod_hash, vtd_hiod_equal,
+ g_free, vtd_hiod_destroy);
vtd_init(s);
pci_setup_iommu(bus, &vtd_iommu_ops, dev);
/* Pseudo address space under root PCI bus. */
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 7fa0a695c8..1eb05c29fc 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -292,6 +292,8 @@ struct IntelIOMMUState {
/* list of registered notifiers */
QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
+ GHashTable *vtd_host_iommu_dev; /* HostIOMMUDevice */
+
/* interrupt remapping */
bool intr_enabled; /* Whether guest enabled IR */
dma_addr_t intr_root; /* Interrupt remapping table pointer */
--
2.41.0.windows.1

View File

@ -0,0 +1,90 @@
From 8414bc02f988ecca7dda5325227ff5ffbe45150c Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Wed, 15 Jan 2025 10:02:58 +0000
Subject: [PATCH] iommufd.h: Updated to openeuler olk-6.6 kernel
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
linux-headers/linux/iommufd.h | 26 ++++++++++++--------------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h
index 41559c6064..3e57fee01c 100644
--- a/linux-headers/linux/iommufd.h
+++ b/linux-headers/linux/iommufd.h
@@ -51,8 +51,8 @@ enum {
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
- IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
- IOMMUFD_CMD_VDEVICE_ALLOC = 0x90,
+ IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
+ IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
};
/**
@@ -397,18 +397,20 @@ struct iommu_hwpt_vtd_s1 {
};
/**
- * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 Context Descriptor Table info
+ * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
* (IOMMU_HWPT_DATA_ARM_SMMUV3)
*
* @ste: The first two double words of the user space Stream Table Entry for
- * a user stage-1 Context Descriptor Table. Must be little-endian.
+ * the translation. Must be little-endian.
* Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
* - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
* - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
*
* -EIO will be returned if @ste is not legal or contains any non-allowed field.
* Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
- * nested domain will translate the same as the nesting parent.
+ * nested domain will translate the same as the nesting parent. The S1 will
+ * install a Context Descriptor Table pointing at userspace memory translated
+ * by the nesting parent.
*/
struct iommu_hwpt_arm_smmuv3 {
__aligned_le64 ste[2];
@@ -920,8 +922,8 @@ enum iommu_viommu_type {
* that is unique to a specific VM. Operations global to the IOMMU are connected
* to the vIOMMU, such as:
* - Security namespace for guest owned ID, e.g. guest-controlled cache tags
+ * - Non-device-affiliated event reporting, e.g. invalidation queue errors
* - Access to a sharable nesting parent pagetable across physical IOMMUs
- * - Non-affiliated event reporting (e.g. an invalidation queue error)
* - Virtualization of various platforms IDs, e.g. RIDs and others
* - Delivery of paravirtualized invalidation
* - Direct assigned invalidation queues
@@ -941,12 +943,10 @@ struct iommu_viommu_alloc {
* struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
* @size: sizeof(struct iommu_vdevice_alloc)
* @viommu_id: vIOMMU ID to associate with the virtual device
- * @dev_id: The pyhsical device to allocate a virtual instance on the vIOMMU
- * @__reserved: Must be 0
+ * @dev_id: The physical device to allocate a virtual instance on the vIOMMU
+ * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY
* @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
- * of AMD IOMMU, and vID of a nested Intel VT-d to a Context Table.
- * @out_vdevice_id: Output virtual instance ID for the allocated object
- * @__reserved2: Must be 0
+ * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table
*
* Allocate a virtual device instance (for a physical device) against a vIOMMU.
* This instance holds the device's information (related to its vIOMMU) in a VM.
@@ -955,10 +955,8 @@ struct iommu_vdevice_alloc {
__u32 size;
__u32 viommu_id;
__u32 dev_id;
- __u32 __reserved;
- __aligned_u64 virt_id;
__u32 out_vdevice_id;
- __u32 __reserved2;
+ __aligned_u64 virt_id;
};
#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
#endif
--
2.41.0.windows.1

View File

@ -0,0 +1,34 @@
From 3dfc0dd0b59925d1b73ca1a0db6d307ae597f76e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Sat, 11 Jan 2025 10:52:56 +0800
Subject: [PATCH] kconfig: Activate IOMMUFD for s390x machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
---
hw/s390x/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig
index 4c068d7960..26ad104485 100644
--- a/hw/s390x/Kconfig
+++ b/hw/s390x/Kconfig
@@ -6,6 +6,7 @@ config S390_CCW_VIRTIO
imply VFIO_CCW
imply WDT_DIAG288
imply PCIE_DEVICES
+ imply IOMMUFD
select PCI_EXPRESS
select S390_FLIC
select S390_FLIC_KVM if KVM
--
2.41.0.windows.1

View File

@ -0,0 +1,54 @@
From cdd5c088ff46ebf423c926fe4c0b12e345ae0db0 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Thu, 23 Feb 2023 12:12:48 +0000
Subject: [PATCH] =?UTF-8?q?kvm:=20Translate=20MSI=20doorbell=20address?=
=?UTF-8?q?=C2=A0only=20if=20it=20is=20valid?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Guest might have already set the MSI doorbell address to invalid
and if we try to translate the address again, Guest reports,
[ 26.784082] arm-smmu-v3 arm-smmu-v3.0.auto: event 0x10 received:
[ 26.784088] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000001000000010
[ 26.784090] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000000000000000
[ 26.784092] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000000000000000
[ 26.784094] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000000000000000
[ 26.788082] arm-smmu-v3 arm-smmu-v3.0.auto: event 0x10 received:
[ 26.788085] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000001000000010
[ 26.788087] arm-smmu-v3 arm-smmu-v3.0.auto: 0x0000000000000000
....
eg: rmmod hisi_zip.ko. The sequence seems to be,
- Write 0 to MSI Message Address register
- Disable MSI
Hence check for address validity before we try to do the translation.
Note: The fix is placed in generic code and hopefully is not a problem
for other architectures.
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
accel/kvm/kvm-all.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index a8e29f148e..6fa97d2cbf 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2074,7 +2074,8 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
kroute.flags = KVM_MSI_VALID_DEVID;
kroute.u.msi.devid = pci_requester_id(dev);
}
- if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
+ if (msg.address &&
+ kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
return -EINVAL;
}
--
2.41.0.windows.1

View File

@ -0,0 +1,76 @@
From 17835e803d0cfa308cd00f070c7e21b27f3d036e Mon Sep 17 00:00:00 2001
From: gubin <gubin_yewu@cmss.chinamobile.com>
Date: Sat, 22 Mar 2025 15:38:09 +0800
Subject: [PATCH] net: fix build when libbpf is disabled, but libxdp is enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cherry-pick from 1f37280b37dbf85f36748f359a9f8802c8fe7ccd
The net/af-xdp.c code is enabled when the libxdp library is present,
however, it also has direct API calls to bpf_xdp_query_id &
bpf_xdp_detach which are provided by the libbpf library.
As a result if building with --disable-libbpf, but libxdp gets
auto-detected, we'll fail to link QEMU
/usr/bin/ld: libcommon.a.p/net_af-xdp.c.o: undefined reference to symbol 'bpf_xdp_query_id@@LIBBPF_0.7.0'
There are two bugs here
* Since we have direct libbpf API calls, when building
net/af-xdp.c, we must tell meson that libbpf is a
dependancy, so that we directly link to it, rather
than relying on indirect linkage.
* When must skip probing for libxdp at all, when libbpf
is not found, raising an error if --enable-libxdp was
given explicitly.
Fixes: cb039ef3d9e3112da01e1ecd9b136ac9809ef733
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: gubin <gubin_yewu@cmss.chinamobile.com>
---
meson.build | 10 ++++++++--
net/meson.build | 2 +-
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/meson.build b/meson.build
index 4078f2aced..aea6a33ca3 100644
--- a/meson.build
+++ b/meson.build
@@ -1972,8 +1972,14 @@ endif
# libxdp
libxdp = not_found
if not get_option('af_xdp').auto() or have_system
- libxdp = dependency('libxdp', required: get_option('af_xdp'),
- version: '>=1.4.0', method: 'pkg-config')
+ if libbpf.found()
+ libxdp = dependency('libxdp', required: get_option('af_xdp'),
+ version: '>=1.4.0', method: 'pkg-config')
+ else
+ if get_option('af_xdp').enabled()
+ error('libxdp requested, but libbpf is not available')
+ endif
+ endif
endif
# libdw
diff --git a/net/meson.build b/net/meson.build
index ce99bd4447..7264479242 100644
--- a/net/meson.build
+++ b/net/meson.build
@@ -37,7 +37,7 @@ if have_netmap
system_ss.add(files('netmap.c'))
endif
-system_ss.add(when: libxdp, if_true: files('af-xdp.c'))
+system_ss.add(when: [libxdp, libbpf], if_true: files('af-xdp.c'))
if have_vhost_net_user
system_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c'))
--
2.41.0.windows.1

View File

@ -0,0 +1,68 @@
From 494e0ace6c120af00b27a0cc1d4a478073654e35 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Thu, 12 Sep 2024 00:33:13 -0700
Subject: [PATCH] pci: Get pasid capability from vIOMMU
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
hw/pci/pci.c | 13 +++++++++++++
include/hw/pci/pci.h | 13 +++++++++++++
2 files changed, 26 insertions(+)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index d6f627aa51..447ef2b163 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2802,6 +2802,19 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
}
}
+bool pci_device_get_pasid_cap(PCIDevice *dev)
+{
+ PCIBus *iommu_bus;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL);
+ if (iommu_bus && iommu_bus->iommu_ops->get_pasid_cap) {
+ return iommu_bus->iommu_ops->get_pasid_cap(pci_get_bus(dev),
+ iommu_bus->iommu_opaque,
+ dev->devfn);
+ }
+ return false;
+}
+
void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
/*
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 8d1af44249..0dfe274c33 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -418,12 +418,25 @@ typedef struct PCIIOMMUOps {
* @devfn: device and function number of the PCI device.
*/
void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn);
+ /**
+ * @get_pasid_cap: get pasid capability from vIOMMU
+ *
+ * Optional callback.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ */
+ bool (*get_pasid_cap)(PCIBus *bus, void *opaque, int devfn);
} PCIIOMMUOps;
AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
Error **errp);
void pci_device_unset_iommu_device(PCIDevice *dev);
+bool pci_device_get_pasid_cap(PCIDevice *dev);
/**
* pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
--
2.41.0.windows.1

348
qemu.spec
View File

@ -3,7 +3,7 @@
Name: qemu Name: qemu
Version: 8.2.0 Version: 8.2.0
Release: 29 Release: 30
Epoch: 11 Epoch: 11
Summary: QEMU is a generic and open source machine emulator and virtualizer Summary: QEMU is a generic and open source machine emulator and virtualizer
License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 License: GPLv2 and BSD and MIT and CC-BY-SA-4.0
@ -622,6 +622,178 @@ Patch0605: target-i386-kvm-Support-to-get-and-enable-extensions.patch
Patch0606: target-i386-csv-Request-to-set-private-memory-of-CSV.patch Patch0606: target-i386-csv-Request-to-set-private-memory-of-CSV.patch
Patch0607: target-i386-csv-Support-load-kernel-hashes-for-CSV3-.patch Patch0607: target-i386-csv-Support-load-kernel-hashes-for-CSV3-.patch
Patch0608: target-i386-csv-Support-inject-secret-for-CSV3-guest.patch Patch0608: target-i386-csv-Support-inject-secret-for-CSV3-guest.patch
Patch0609: arm-VirtCCA-CVM-support-UEFI-boot.patch
Patch0610: arm-VirtCCA-qemu-uefi-boot-support-kae.patch
Patch0611: arm-VirtCCA-Compatibility-with-older-versions-of-TMM.patch
Patch0612: arm-VirtCCA-qemu-CoDA-support-UEFI-boot.patch
Patch0613: BUGFIX-Enforce-isolation-for-virtcca_shared_hugepage.patch
Patch0614: backends-VirtCCA-cvm_gpa_start-supports-both-1GB-and.patch
Patch0615: qga-Add-log-to-guest-fsfreeze-thaw-command.patch
Patch0616: qga-Don-t-daemonize-before-channel-is-initialized.patch
Patch0617: virtcca-add-kvm-isolation-when-get-tmi-version.patch
Patch0618: backends-cryptodev-Do-not-abort-for-invalid-session-.patch
Patch0619: backends-cryptodev-Do-not-ignore-throttle-backends-E.patch
Patch0620: hw-nvme-fix-invalid-check-on-mcl.patch
Patch0621: hw-nvme-fix-invalid-endian-conversion.patch
Patch0622: net-fix-build-when-libbpf-is-disabled-but-libxdp-is-.patch
Patch0623: target-i386-Add-more-features-enumerated-by-CPUID.7..patch
Patch0624: target-i386-fix-feature-dependency-for-WAITPKG.patch
Patch0625: target-i386-add-support-for-FRED-in-CPUID-enumeratio.patch
Patch0626: target-i386-mark-CR4.FRED-not-reserved.patch
Patch0627: vmxcap-add-support-for-VMX-FRED-controls.patch
Patch0628: target-i386-enumerate-VMX-nested-exception-support.patch
Patch0629: target-i386-Add-get-set-migrate-support-for-FRED-MSR.patch
Patch0630: target-i386-Delete-duplicated-macro-definition-CR4_F.patch
Patch0631: target-i386-Add-VMX-control-bits-for-nested-FRED-sup.patch
Patch0632: target-i386-Raise-the-highest-index-value-used-for-a.patch
Patch0633: target-i386-pass-X86CPU-to-x86_cpu_get_supported_fea.patch
Patch0634: i386-cpuid-Remove-subleaf-constraint-on-CPUID-leaf-1.patch
Patch0635: target-i386-Don-t-construct-a-all-zero-entry-for-CPU.patch
Patch0636: target-i386-Enable-fdp-excptn-only-and-zero-fcs-fds.patch
Patch0637: target-i386-Construct-CPUID-2-as-stateful-iff-times-.patch
Patch0638: target-i386-Make-invtsc-migratable-when-user-sets-ts.patch
Patch0639: hw-pci-host-designware-Fix-ATU_UPPER_TARGET-register.patch
Patch0640: hw-ufs-free-irq-on-exit.patch
Patch0641: hw-sd-sdhci-free-irq-on-exit.patch
Patch0642: target-s390x-Fix-a-typo-in-s390_cpu_class_init.patch
Patch0643: hw-misc-aspeed_hace-Fix-buffer-overflow-in-has_paddi.patch
Patch0644: hw-xen-Fix-xen_bus_realize-error-handling.patch
Patch0645: cryptodev-Fix-error-handling-in-cryptodev_lkcf_execu.patch
Patch0646: vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch
Patch0647: vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch
Patch0648: vfio-container-Switch-to-dma_map-unmap-API.patch
Patch0649: vfio-common-Introduce-vfio_container_init-destroy-he.patch
Patch0650: vfio-common-Move-giommu_list-in-base-container.patch
Patch0651: vfio-container-Move-space-field-to-base-container.patch
Patch0652: vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch
Patch0653: vfio-container-Move-per-container-device-list-in-bas.patch
Patch0654: vfio-container-Convert-functions-to-base-container.patch
Patch0655: vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch
Patch0656: vfio-container-Move-vrdl_list-to-base-container.patch
Patch0657: vfio-container-Move-listener-to-base-container.patch
Patch0658: vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch
Patch0659: vfio-container-Move-iova_ranges-to-base-container.patch
Patch0660: vfio-container-Implement-attach-detach_device.patch
Patch0661: vfio-spapr-Introduce-spapr-backend-and-target-interf.patch
Patch0662: vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch
Patch0663: vfio-spapr-Move-prereg_listener-into-spapr-container.patch
Patch0664: vfio-spapr-Move-hostwin_list-into-spapr-container.patch
Patch0665: backends-iommufd-Introduce-the-iommufd-object.patch
Patch0666: util-char_dev-Add-open_cdev.patch
Patch0667: vfio-common-return-early-if-space-isn-t-empty.patch
Patch0668: vfio-iommufd-Implement-the-iommufd-backend.patch
Patch0669: vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch
Patch0670: vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch
Patch0671: vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch
Patch0672: vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch
Patch0673: vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch
Patch0674: vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch
Patch0675: vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch
Patch0676: vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch
Patch0677: vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch
Patch0678: vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch
Patch0679: vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch
Patch0680: vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch
Patch0681: vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch
Patch0682: vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch
Patch0683: hw-arm-Activate-IOMMUFD-for-virt-machines.patch
Patch0684: kconfig-Activate-IOMMUFD-for-s390x-machines.patch
Patch0685: hw-i386-Activate-IOMMUFD-for-q35-machines.patch
Patch0686: vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch
Patch0687: vfio-platform-Move-VFIODevice-initializations-in-vfi.patch
Patch0688: vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch
Patch0689: vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch
Patch0690: vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch
Patch0691: docs-devel-Add-VFIO-iommufd-backend-documentation.patch
Patch0692: vfio-container-Introduce-vfio_legacy_setup-for-furth.patch
Patch0693: vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch
Patch0694: vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch
Patch0695: vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch
Patch0696: vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch
Patch0697: vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch
Patch0698: vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch
Patch0699: vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch
Patch0700: vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch
Patch0701: vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch
Patch0702: backends-Introduce-HostIOMMUDevice-abstract.patch
Patch0703: backends-host_iommu_device-Introduce-HostIOMMUDevice.patch
Patch0704: vfio-container-Introduce-TYPE_HOST_IOMMU_DEVICE_LEGA.patch
Patch0705: backends-iommufd-Introduce-TYPE_HOST_IOMMU_DEVICE_IO.patch
Patch0706: range-Introduce-range_get_last_bit.patch
Patch0707: vfio-container-Implement-HostIOMMUDeviceClass-realiz.patch
Patch0708: backends-iommufd-Introduce-helper-function-iommufd_b.patch
Patch0709: vfio-iommufd-Implement-HostIOMMUDeviceClass-realize-.patch
Patch0710: vfio-container-Implement-HostIOMMUDeviceClass-get_ca.patch
Patch0711: backends-iommufd-Implement-HostIOMMUDeviceClass-get_.patch
Patch0712: vfio-Create-host-IOMMU-device-instance.patch
Patch0713: hw-pci-Introduce-helper-function-pci_device_get_iomm.patch
Patch0714: hw-pci-Introduce-pci_device_-set-unset-_iommu_device.patch
Patch0715: vfio-pci-Pass-HostIOMMUDevice-to-vIOMMU.patch
Patch0716: intel_iommu-Extract-out-vtd_cap_init-to-initialize-c.patch
Patch0717: intel_iommu-Implement-set-unset-_iommu_device-callba.patch
Patch0718: intel_iommu-Check-compatibility-with-host-IOMMU-capa.patch
Patch0719: vfio-pci-Extract-mdev-check-into-an-helper.patch
Patch0720: vfio-iommufd-Don-t-initialize-nor-set-a-HOST_IOMMU_D.patch
Patch0721: backends-iommufd-Extend-iommufd_backend_get_device_i.patch
Patch0722: vfio-iommufd-Return-errno-in-iommufd_cdev_attach_ioa.patch
Patch0723: vfio-ap-Don-t-initialize-HOST_IOMMU_DEVICE-with-mdev.patch
Patch0724: vfio-ccw-Don-t-initialize-HOST_IOMMU_DEVICE-with-mde.patch
Patch0725: vfio-iommufd-Introduce-auto-domain-creation.patch
Patch0726: HostIOMMUDevice-Store-the-VFIO-VDPA-agent.patch
Patch0727: vfio-iommufd-container-Remove-caps-aw_bits.patch
Patch0728: vfio-iommufd-Add-hw_caps-field-to-HostIOMMUDeviceCap.patch
Patch0729: vfio-iommufd-container-Invoke-HostIOMMUDevice-realiz.patch
Patch0730: vfio-iommufd-Probe-and-request-hwpt-dirty-tracking-c.patch
Patch0731: vfio-iommufd-Implement-VFIOIOMMUClass-set_dirty_trac.patch
Patch0732: vfio-iommufd-Implement-VFIOIOMMUClass-query_dirty_bi.patch
Patch0733: vfio-migration-Don-t-block-migration-device-dirty-tr.patch
Patch0734: vfio-common-Allow-disabling-device-dirty-page-tracki.patch
Patch0735: Update-iommufd.h-header-for-vSVA.patch
Patch0736: backends-iommufd-Add-helpers-for-invalidating-user-m.patch
Patch0737: vfio-iommufd-Add-properties-and-handlers-to-TYPE_HOS.patch
Patch0738: HostIOMMUDevice-Introduce-realize_late-callback.patch
Patch0739: vfio-iommufd-Implement-HostIOMMUDeviceClass-realize_.patch
Patch0740: vfio-iommufd-Implement-at-de-tach_hwpt-handlers.patch
Patch0741: backends-iommufd-Introduce-iommufd_backend_alloc_vio.patch
Patch0742: backends-iommufd-Introduce-iommufd_vdev_alloc.patch
Patch0743: backends-iommufd-Introduce-iommufd_viommu_invalidate.patch
Patch0744: hw-arm-smmu-common-Add-a-nested-flag-to-SMMUState.patch
Patch0745: hw-arm-smmu-common-Bypass-emulated-IOTLB-for-a-neste.patch
Patch0746: hw-arm-smmu-common-Extract-smmu_get_sbus-and-smmu_ge.patch
Patch0747: hw-arm-smmu-common-Add-set-unset_iommu_device-callba.patch
Patch0748: hw-arm-smmu-common-Add-iommufd-helpers.patch
Patch0749: hw-arm-smmu-common-Return-sysmem-if-stage-1-is-bypas.patch
Patch0750: hw-arm-smmuv3-Ignore-IOMMU_NOTIFIER_MAP-for-nested-s.patch
Patch0751: hw-arm-smmuv3-Read-host-SMMU-device-info.patch
Patch0752: hw-arm-smmuv3-Check-idr-registers-for-STE_S1CDMAX-an.patch
Patch0753: hw-arm-smmuv3-Add-smmu_dev_install_nested_ste-for-CF.patch
Patch0754: hw-arm-smmuv3-Add-missing-STE-invalidation.patch
Patch0755: hw-arm-smmu-common-Replace-smmu_iommu_mr-with-smmu_f.patch
Patch0756: hw-arm-smmuv3-Forward-cache-invalidate-commands-via-.patch
Patch0757: tests-qtest-Allow-DSDT-acpi-tables-to-change.patch
Patch0758: acpi-gpex-Fix-PCI-Express-Slot-Information-function-.patch
Patch0759: tests-data-acpi-Update-DSDT-acpi-tables.patch
Patch0760: hw-pci-host-gpex-needs-kernel-fix-Allow-to-generate-.patch
Patch0761: hw-arm-virt-Add-an-SMMU_IO_LEN-macro.patch
Patch0762: hw-arm-smmuv3-Add-initial-support-for-SMMUv3-Nested-.patch
Patch0763: hw-arm-smmuv3-Associate-a-pci-bus-with-a-SMMUv3-Nest.patch
Patch0764: hw-arm-virt-acpi-build-Build-IORT-with-multiple-SMMU.patch
Patch0765: tests-qtest-Allow-IORT-acpi-table-to-change.patch
Patch0766: hw-arm-virt-acpi-build-Add-IORT-RMR-regions-to-handl.patch
Patch0767: tests-data-acpi-virt-Update-IORT-acpi-table.patch
Patch0768: iommufd.h-Updated-to-openeuler-olk-6.6-kernel.patch
Patch0769: hw-arm-smmuv3-Enable-sva-stall-IDR-features.patch
Patch0770: kvm-Translate-MSI-doorbell-address-only-if-it-is-val.patch
Patch0771: smmuv3-Add-support-for-page-fault-handling.patch
Patch0772: pci-Get-pasid-capability-from-vIOMMU.patch
Patch0773: backend-iommufd-Report-PASID-capability.patch
Patch0774: vfio-Synthesize-vPASID-capability-to-VM.patch
Patch0775: smmuv3-realize-get_pasid_cap-and-set-ssidsize-with-p.patch
Patch0776: smmu-common-Return-sysmem-address-space-only-for-vfi.patch
Patch0777: smmuv3-Change-arm-smmuv3-nested-name-to-arm-smmuv3-a.patch
Patch0778: smmuv3-Use-default-bus-for-arm-smmuv3-accel.patch
Patch0779: gpex-acpi-Remove-duplicate-DSM-5.patch
Patch0780: Revert-linux-user-Print-tid-not-pid-with-strace.patch
BuildRequires: flex BuildRequires: flex
BuildRequires: gcc BuildRequires: gcc
@ -1220,6 +1392,180 @@ getent passwd qemu >/dev/null || \
%endif %endif
%changelog %changelog
* Tue Apr 22 2025 Jiabo Feng <fengjiabo1@huawei.com> - 11:8.2.0-30
- Revert "linux-user: Print tid not pid with strace"
- gpex-acpi: Remove duplicate DSM #5
- smmuv3: Use default bus for arm-smmuv3-accel
- smmuv3: Change arm-smmuv3-nested name to arm-smmuv3-accel
- smmu-common: Return sysmem address space only for vfio-pci
- smmuv3: realize get_pasid_cap and set ssidsize with pasid
- vfio: Synthesize vPASID capability to VM
- backend/iommufd: Report PASID capability
- pci: Get pasid capability from vIOMMU
- smmuv3: Add support for page fault handling
- kvm: Translate MSI doorbell address only if it is valid
- hw/arm/smmuv3: Enable sva/stall IDR features
- iommufd.h: Updated to openeuler olk-6.6 kernel
- tests/data/acpi/virt: Update IORT acpi table
- hw/arm/virt-acpi-build: Add IORT RMR regions to handle MSI nested binding
- tests/qtest: Allow IORT acpi table to change
- hw/arm/virt-acpi-build: Build IORT with multiple SMMU nodes
- hw/arm/smmuv3: Associate a pci bus with a SMMUv3 Nested device
- hw/arm/smmuv3: Add initial support for SMMUv3 Nested device
- hw/arm/virt: Add an SMMU_IO_LEN macro
- hw/pci-host/gpex: [needs kernel fix] Allow to generate preserve boot config DSM #5
- tests/data/acpi: Update DSDT acpi tables
- acpi/gpex: Fix PCI Express Slot Information function 0 returned value
- tests/qtest: Allow DSDT acpi tables to change
- hw/arm/smmuv3: Forward cache invalidate commands via iommufd
- hw/arm/smmu-common: Replace smmu_iommu_mr with smmu_find_sdev
- hw/arm/smmuv3: Add missing STE invalidation
- hw/arm/smmuv3: Add smmu_dev_install_nested_ste() for CFGI_STE
- hw/arm/smmuv3: Check idr registers for STE_S1CDMAX and STE_S1STALLD
- hw/arm/smmuv3: Read host SMMU device info
- hw/arm/smmuv3: Ignore IOMMU_NOTIFIER_MAP for nested-smmuv3
- hw/arm/smmu-common: Return sysmem if stage-1 is bypassed
- hw/arm/smmu-common: Add iommufd helpers
- hw/arm/smmu-common: Add set/unset_iommu_device callback
- hw/arm/smmu-common: Extract smmu_get_sbus and smmu_get_sdev helpers
- hw/arm/smmu-common: Bypass emulated IOTLB for a nested SMMU
- hw/arm/smmu-common: Add a nested flag to SMMUState
- backends/iommufd: Introduce iommufd_viommu_invalidate_cache
- backends/iommufd: Introduce iommufd_vdev_alloc
- backends/iommufd: Introduce iommufd_backend_alloc_viommu
- vfio/iommufd: Implement [at|de]tach_hwpt handlers
- vfio/iommufd: Implement HostIOMMUDeviceClass::realize_late() handler
- HostIOMMUDevice: Introduce realize_late callback
- vfio/iommufd: Add properties and handlers to TYPE_HOST_IOMMU_DEVICE_IOMMUFD
- backends/iommufd: Add helpers for invalidating user-managed HWPT
- Update iommufd.h header for vSVA
- vfio/common: Allow disabling device dirty page tracking
- vfio/migration: Don't block migration device dirty tracking is unsupported
- vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support
- vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support
- vfio/iommufd: Probe and request hwpt dirty tracking capability
- vfio/{iommufd, container}: Invoke HostIOMMUDevice::realize() during attach_device()
- vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps
- vfio/{iommufd,container}: Remove caps::aw_bits
- HostIOMMUDevice: Store the VFIO/VDPA agent
- vfio/iommufd: Introduce auto domain creation
- vfio/ccw: Don't initialize HOST_IOMMU_DEVICE with mdev
- vfio/ap: Don't initialize HOST_IOMMU_DEVICE with mdev
- vfio/iommufd: Return errno in iommufd_cdev_attach_ioas_hwpt()
- backends/iommufd: Extend iommufd_backend_get_device_info() to fetch HW capabilities
- vfio/iommufd: Don't initialize nor set a HOST_IOMMU_DEVICE with mdev
- vfio/pci: Extract mdev check into an helper
- intel_iommu: Check compatibility with host IOMMU capabilities
- intel_iommu: Implement [set|unset]_iommu_device() callbacks
- intel_iommu: Extract out vtd_cap_init() to initialize cap/ecap
- vfio/pci: Pass HostIOMMUDevice to vIOMMU
- hw/pci: Introduce pci_device_[set|unset]_iommu_device()
- hw/pci: Introduce helper function pci_device_get_iommu_bus_devfn()
- vfio: Create host IOMMU device instance
- backends/iommufd: Implement HostIOMMUDeviceClass::get_cap() handler
- vfio/container: Implement HostIOMMUDeviceClass::get_cap() handler
- vfio/iommufd: Implement HostIOMMUDeviceClass::realize() handler
- backends/iommufd: Introduce helper function iommufd_backend_get_device_info()
- vfio/container: Implement HostIOMMUDeviceClass::realize() handler
- range: Introduce range_get_last_bit()
- backends/iommufd: Introduce TYPE_HOST_IOMMU_DEVICE_IOMMUFD[_VFIO] devices
- vfio/container: Introduce TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO device
- backends/host_iommu_device: Introduce HostIOMMUDeviceCaps
- backends: Introduce HostIOMMUDevice abstract
- vfio/iommufd: Remove CONFIG_IOMMUFD usage
- vfio/spapr: Extend VFIOIOMMUOps with a release handler
- vfio/spapr: Only compile sPAPR IOMMU support when needed
- vfio/iommufd: Introduce a VFIOIOMMU iommufd QOM interface
- vfio/spapr: Introduce a sPAPR VFIOIOMMU QOM interface
- vfio/container: Intoduce a new VFIOIOMMUClass::setup handler
- vfio/container: Introduce a VFIOIOMMU legacy QOM interface
- vfio/container: Introduce a VFIOIOMMU QOM interface
- vfio/container: Initialize VFIOIOMMUOps under vfio_init_container()
- vfio/container: Introduce vfio_legacy_setup() for further cleanups
- docs/devel: Add VFIO iommufd backend documentation
- vfio: Introduce a helper function to initialize VFIODevice
- vfio/ccw: Move VFIODevice initializations in vfio_ccw_instance_init
- vfio/ap: Move VFIODevice initializations in vfio_ap_instance_init
- vfio/platform: Move VFIODevice initializations in vfio_platform_instance_init
- vfio/pci: Move VFIODevice initializations in vfio_instance_init
- hw/i386: Activate IOMMUFD for q35 machines
- kconfig: Activate IOMMUFD for s390x machines
- hw/arm: Activate IOMMUFD for virt machines
- vfio: Make VFIOContainerBase poiner parameter const in VFIOIOMMUOps callbacks
- vfio/ccw: Make vfio cdev pre-openable by passing a file handle
- vfio/ccw: Allow the selection of a given iommu backend
- vfio/ap: Make vfio cdev pre-openable by passing a file handle
- vfio/ap: Allow the selection of a given iommu backend
- vfio/platform: Make vfio cdev pre-openable by passing a file handle
- vfio/platform: Allow the selection of a given iommu backend
- vfio/pci: Make vfio cdev pre-openable by passing a file handle
- vfio/pci: Allow the selection of a given iommu backend
- vfio/iommufd: Enable pci hot reset through iommufd cdev interface
- vfio/pci: Introduce a vfio pci hot reset interface
- vfio/pci: Extract out a helper vfio_pci_get_pci_hot_reset_info
- vfio/iommufd: Add support for iova_ranges and pgsizes
- vfio/iommufd: Relax assert check for iommufd backend
- vfio/iommufd: Implement the iommufd backend
- vfio/common: return early if space isn't empty
- util/char_dev: Add open_cdev()
- backends/iommufd: Introduce the iommufd object
- vfio/spapr: Move hostwin_list into spapr container
- vfio/spapr: Move prereg_listener into spapr container
- vfio/spapr: switch to spapr IOMMU BE add/del_section_window
- vfio/spapr: Introduce spapr backend and target interface
- vfio/container: Implement attach/detach_device
- vfio/container: Move iova_ranges to base container
- vfio/container: Move dirty_pgsizes and max_dirty_bitmap_size to base container
- vfio/container: Move listener to base container
- vfio/container: Move vrdl_list to base container
- vfio/container: Move pgsizes and dma_max_mappings to base container
- vfio/container: Convert functions to base container
- vfio/container: Move per container device list in base container
- vfio/container: Switch to IOMMU BE set_dirty_page_tracking/query_dirty_bitmap API
- vfio/container: Move space field to base container
- vfio/common: Move giommu_list in base container
- vfio/common: Introduce vfio_container_init/destroy helper
- vfio/container: Switch to dma_map|unmap API
- vfio/container: Introduce a empty VFIOIOMMUOps
- vfio: Introduce base object for VFIOContainer and targeted interface
- cryptodev: Fix error handling in cryptodev_lkcf_execute_task()
- hw/xen: Fix xen_bus_realize() error handling
- hw/misc/aspeed_hace: Fix buffer overflow in has_padding function
- target/s390x: Fix a typo in s390_cpu_class_init()
- hw/sd/sdhci: free irq on exit
- hw/ufs: free irq on exit
- hw/pci-host/designware: Fix ATU_UPPER_TARGET register access
- target/i386: Make invtsc migratable when user sets tsc-khz explicitly
- target/i386: Construct CPUID 2 as stateful iff times > 1
- target/i386: Enable fdp-excptn-only and zero-fcs-fds
- target/i386: Don't construct a all-zero entry for CPUID[0xD 0x3f]
- i386/cpuid: Remove subleaf constraint on CPUID leaf 1F
- target/i386: pass X86CPU to x86_cpu_get_supported_feature_word
- target/i386: Raise the highest index value used for any VMCS encoding
- target/i386: Add VMX control bits for nested FRED support
- target/i386: Delete duplicated macro definition CR4_FRED_MASK
- target/i386: Add get/set/migrate support for FRED MSRs
- target/i386: enumerate VMX nested-exception support
- vmxcap: add support for VMX FRED controls
- target/i386: mark CR4.FRED not reserved
- target/i386: add support for FRED in CPUID enumeration
- target/i386: fix feature dependency for WAITPKG
- target/i386: Add more features enumerated by CPUID.7.2.EDX
- net: fix build when libbpf is disabled, but libxdp is enabled
- hw/nvme: fix invalid endian conversion
- hw/nvme: fix invalid check on mcl
- backends/cryptodev: Do not ignore throttle/backends Errors
- backends/cryptodev: Do not abort for invalid session ID
- virtcca: add kvm isolation when get tmi version.
- qga: Don't daemonize before channel is initialized
- qga: Add log to guest-fsfreeze-thaw command
- backends: VirtCCA: cvm_gpa_start supports both 1GB and 3GB
- BUGFIX: Enforce isolation for virtcca_shared_hugepage
- arm: VirtCCA: qemu CoDA support UEFI boot
- arm: VirtCCA: Compatibility with older versions of TMM and the kernel
- arm: VirtCCA: qemu uefi boot support kae
- arm: VirtCCA: CVM support UEFI boot
* Fri Feb 21 2025 Jiabo Feng <fengjiabo1@huawei.com> - 11:8.2.0-29 * Fri Feb 21 2025 Jiabo Feng <fengjiabo1@huawei.com> - 11:8.2.0-29
- target/i386: csv: Support inject secret for CSV3 guest only if the extension is enabled - target/i386: csv: Support inject secret for CSV3 guest only if the extension is enabled
- target/i386: csv: Support load kernel hashes for CSV3 guest only if the extension is enabled - target/i386: csv: Support load kernel hashes for CSV3 guest only if the extension is enabled

View File

@ -0,0 +1,48 @@
From 3a14516128cf936906e5f519bf7808b9a977a757 Mon Sep 17 00:00:00 2001
From: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
Date: Fri, 7 Mar 2025 21:57:29 -0500
Subject: [PATCH] qga: Add log to guest-fsfreeze-thaw command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cheery-pick from ad1e6843632555c771dda6a9425930fa25b71fb3
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-ID: <20241216154552.213961-2-kkostiuk@redhat.com>
Signed-off-by: Konstantin Kostiuk <kkostiuk@redhat.com>
Signed-off-by: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
---
qga/commands-posix.c | 1 +
qga/commands-win32.c | 3 +++
2 files changed, 4 insertions(+)
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 6169bbf7a0..f0d8e9e9c5 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -759,6 +759,7 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp)
ret = qmp_guest_fsfreeze_do_thaw(errp);
if (ret >= 0) {
ga_unset_frozen(ga_state);
+ slog("guest-fsthaw called");
execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp);
} else {
ret = 0;
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 697c65507c..656d1459f1 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -1275,6 +1275,9 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp)
qga_vss_fsfreeze(&i, false, NULL, errp);
ga_unset_frozen(ga_state);
+
+ slog("guest-fsthaw called");
+
return i;
}
--
2.41.0.windows.1

View File

@ -0,0 +1,106 @@
From 752d98d93459c87817be5e02c39257e0fa5934f8 Mon Sep 17 00:00:00 2001
From: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
Date: Fri, 7 Mar 2025 21:07:11 -0500
Subject: [PATCH] qga: Don't daemonize before channel is initialized
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cheery-pick from c6f5dd7ac8ef62dcdec4cdeda1467c658161afff
If the agent is set to daemonize but for whatever reason fails to
init the channel, the error message is lost. Worse, the agent
daemonizes needlessly and returns success. For instance:
# qemu-ga -m virtio-serial \
-p /dev/nonexistent_device \
-f /run/qemu-ga.pid \
-t /run \
-d
# echo $?
0
This makes it needlessly hard for init scripts to detect a
failure in qemu-ga startup. Though, they shouldn't pass '-d' in
the first place.
Let's open the channel first and only after that become a daemon.
Related bug: https://bugs.gentoo.org/810628
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Ján Tomko <jtomko@redhat.com>
Reviewed-by: Konstantin Kostiuk <kkostiuk@redhat.com>
Message-ID: <7a42b0cbda5c7e01cf76bc1b29a1210cd018fa78.1736261360.git.mprivozn@redhat.com>
Signed-off-by: Konstantin Kostiuk <kkostiuk@redhat.com>
Signed-off-by: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
---
qga/main.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/qga/main.c b/qga/main.c
index c4dcbb86be..8d341ffdf1 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -1407,7 +1407,6 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation)
if (config->daemonize) {
/* delay opening/locking of pidfile till filesystems are unfrozen */
s->deferred_options.pid_filepath = config->pid_filepath;
- become_daemon(NULL);
}
if (config->log_filepath) {
/* delay opening the log file till filesystems are unfrozen */
@@ -1416,9 +1415,6 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation)
ga_disable_logging(s);
qmp_for_each_command(&ga_commands, ga_disable_not_allowed_freeze, NULL);
} else {
- if (config->daemonize) {
- become_daemon(config->pid_filepath);
- }
if (config->log_filepath) {
FILE *log_file = ga_open_logfile(config->log_filepath);
if (!log_file) {
@@ -1482,6 +1478,20 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation)
}
#endif
+ if (!channel_init(s, s->config->method, s->config->channel_path,
+ s->socket_activation ? FIRST_SOCKET_ACTIVATION_FD : -1)) {
+ g_critical("failed to initialize guest agent channel");
+ return NULL;
+ }
+
+ if (config->daemonize) {
+ if (ga_is_frozen(s)) {
+ become_daemon(NULL);
+ } else {
+ become_daemon(config->pid_filepath);
+ }
+ }
+
ga_state = s;
return s;
failed:
@@ -1516,8 +1526,9 @@ static void cleanup_agent(GAState *s)
static int run_agent_once(GAState *s)
{
- if (!channel_init(s, s->config->method, s->config->channel_path,
- s->socket_activation ? FIRST_SOCKET_ACTIVATION_FD : -1)) {
+ if (!s->channel &&
+ channel_init(s, s->config->method, s->config->channel_path,
+ s->socket_activation ? FIRST_SOCKET_ACTIVATION_FD : -1)) {
g_critical("failed to initialize guest agent channel");
return EXIT_FAILURE;
}
@@ -1526,6 +1537,7 @@ static int run_agent_once(GAState *s)
if (s->channel) {
ga_channel_free(s->channel);
+ s->channel = NULL;
}
return EXIT_SUCCESS;
--
2.41.0.windows.1

View File

@ -0,0 +1,52 @@
From 30150b8727e9ec41f83c4dfcd93f04b766357469 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:31 +0800
Subject: [PATCH] range: Introduce range_get_last_bit()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This helper get the highest 1 bit position of the upper bound.
If the range is empty or upper bound is zero, -1 is returned.
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
include/qemu/range.h | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/include/qemu/range.h b/include/qemu/range.h
index 205e1da76d..4ce694a398 100644
--- a/include/qemu/range.h
+++ b/include/qemu/range.h
@@ -20,6 +20,8 @@
#ifndef QEMU_RANGE_H
#define QEMU_RANGE_H
+#include "qemu/bitops.h"
+
/*
* Operations on 64 bit address ranges.
* Notes:
@@ -217,6 +219,15 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1,
return !(last2 < first1 || last1 < first2);
}
+/* Get highest non-zero bit position of a range */
+static inline int range_get_last_bit(Range *range)
+{
+ if (range_is_empty(range)) {
+ return -1;
+ }
+ return 63 - clz64(range->upb);
+}
+
/*
* Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
* Both @a and @b must not be empty.
--
2.41.0.windows.1

View File

@ -0,0 +1,39 @@
From 58f66c2581b3c4a45a02717330f1b2188424889b Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Wed, 15 Jan 2025 16:11:21 +0000
Subject: [PATCH] smmu-common: Return sysmem address space only for vfio-pci
This will enable pcie-root-port hotplug event irq to work.
Discussion Link: https://lore.kernel.org/qemu-devel/74114c0db34b420a90e9fe5bd991767e@huawei.com/
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/smmu-common.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 3a257a5b0e..6c4b82757f 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -639,9 +639,16 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn)
SMMUState *s = opaque;
SMMUPciBus *sbus = smmu_get_sbus(s, bus);
SMMUDevice *sdev = smmu_get_sdev(s, sbus, bus, devfn);
+ bool is_vfio = false;
+ PCIDevice *pdev;
+
+ pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
+ if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+ is_vfio = true;
+ }
/* Return the system as if the device uses stage-2 only */
- if (s->nested && !sdev->s1_hwpt) {
+ if (s->nested && !sdev->s1_hwpt && is_vfio) {
return &sdev->as_sysmem;
} else {
return &sdev->as;
--
2.41.0.windows.1

View File

@ -0,0 +1,462 @@
From ebfa7213e32faafd5532d6f5b3cb873018b671ae Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Thu, 10 Oct 2024 06:19:31 +0000
Subject: [PATCH] smmuv3: Add support for page fault handling
Handle page fault from host and send response back.
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
backends/iommufd.c | 20 +++-
hw/arm/smmu-common.c | 39 ++++++--
hw/arm/smmuv3.c | 188 ++++++++++++++++++++++++++++++++++-
hw/vfio/iommufd.c | 2 +-
include/hw/arm/smmu-common.h | 24 ++++-
include/sysemu/iommufd.h | 2 +-
6 files changed, 263 insertions(+), 12 deletions(-)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index ee6f5bcf65..e9ce82297b 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -228,7 +228,7 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
uint32_t pt_id, uint32_t flags,
uint32_t data_type, uint32_t data_len,
void *data_ptr, uint32_t *out_hwpt,
- Error **errp)
+ uint32_t *out_fault_fd, Error **errp)
{
int ret, fd = be->fd;
struct iommu_hwpt_alloc alloc_hwpt = {
@@ -241,6 +241,24 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
.data_uptr = (uintptr_t)data_ptr,
};
+ if (flags & IOMMU_HWPT_FAULT_ID_VALID) {
+
+ struct iommu_fault_alloc cmd = {
+ .size = sizeof(cmd),
+ };
+
+ ret = ioctl(fd, IOMMU_FAULT_QUEUE_ALLOC, &cmd);
+ if (ret) {
+ ret = -errno;
+ error_report("IOMMU_FAULT_ALLOC failed: %m");
+ } else {
+ alloc_hwpt.fault_id = cmd.out_fault_id;
+ if (out_fault_fd) {
+ *out_fault_fd = cmd.out_fault_fd;
+ }
+ }
+ }
+
ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
data_len, (uintptr_t)data_ptr,
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index d0bc620606..c382fa16e5 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -670,7 +670,7 @@ static bool smmu_dev_attach_viommu(SMMUDevice *sdev,
if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->ioas_id,
IOMMU_HWPT_ALLOC_NEST_PARENT,
IOMMU_HWPT_DATA_NONE, 0, NULL,
- &s2_hwpt_id, errp)) {
+ &s2_hwpt_id, NULL, errp)) {
error_setg(errp, "failed to allocate an S2 hwpt");
return false;
}
@@ -695,7 +695,7 @@ static bool smmu_dev_attach_viommu(SMMUDevice *sdev,
viommu->core->viommu_id, 0,
IOMMU_HWPT_DATA_ARM_SMMUV3,
sizeof(abort_data), &abort_data,
- &viommu->abort_hwpt_id, errp)) {
+ &viommu->abort_hwpt_id, NULL, errp)) {
error_setg(errp, "failed to allocate an abort pagetable");
goto free_viommu_core;
}
@@ -704,7 +704,7 @@ static bool smmu_dev_attach_viommu(SMMUDevice *sdev,
viommu->core->viommu_id, 0,
IOMMU_HWPT_DATA_ARM_SMMUV3,
sizeof(bypass_data), &bypass_data,
- &viommu->bypass_hwpt_id, errp)) {
+ &viommu->bypass_hwpt_id, NULL, errp)) {
error_setg(errp, "failed to allocate a bypass pagetable");
goto free_abort_hwpt;
}
@@ -882,6 +882,25 @@ void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort)
hwpt_id = sdev->viommu->bypass_hwpt_id;
}
+ /* ToDo: May be better to move the below to smmuv3. */
+ if (s1_hwpt->out_fault_fd) {
+ struct io_uring *ring = &s1_hwpt->fault_ring;
+ struct io_uring_sqe *sqe;
+ struct __kernel_timespec ts = {.tv_sec = 0, .tv_nsec = 1};
+
+ s1_hwpt->exiting = true;
+ /* Send out a timeout sqe for the read handler to exit */
+ sqe = io_uring_get_sqe(ring);
+ io_uring_prep_timeout(sqe, &ts, 0, 0);
+ io_uring_submit(ring);
+
+ qemu_cond_signal(&s1_hwpt->fault_cond);
+ qemu_thread_join(&s1_hwpt->read_fault_thread);
+ qemu_thread_join(&s1_hwpt->write_fault_thread);
+ qemu_mutex_destroy(&s1_hwpt->fault_mutex);
+ io_uring_queue_exit(&s1_hwpt->fault_ring);
+ }
+
if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, NULL)) {
return;
}
@@ -892,11 +911,13 @@ void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort)
}
int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type,
- uint32_t data_len, void *data)
+ uint32_t data_len, void *data,
+ bool req_fault_fd)
{
SMMUViommu *viommu = sdev->viommu;
SMMUS1Hwpt *s1_hwpt = sdev->s1_hwpt;
HostIOMMUDeviceIOMMUFD *idev = sdev->idev;
+ uint32_t flags = 0;
if (!idev || !viommu) {
return -ENOENT;
@@ -912,12 +933,18 @@ int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type,
}
s1_hwpt->smmu = sdev->smmu;
+ s1_hwpt->sdev = sdev;
s1_hwpt->viommu = viommu;
s1_hwpt->iommufd = idev->iommufd;
+ if (req_fault_fd) {
+ flags |= IOMMU_HWPT_FAULT_ID_VALID;
+ }
+
if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
- viommu->core->viommu_id, 0, data_type,
- data_len, data, &s1_hwpt->hwpt_id, NULL)) {
+ viommu->core->viommu_id, flags, data_type,
+ data_len, data, &s1_hwpt->hwpt_id,
+ &s1_hwpt->out_fault_fd, NULL)) {
goto free;
}
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 8d8dcccd48..30c0ae4c3b 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -34,6 +34,9 @@
#include "hw/arm/smmuv3.h"
#include "smmuv3-internal.h"
#include "smmu-internal.h"
+#ifdef CONFIG_LINUX_IO_URING
+#include <liburing.h>
+#endif
#define PTW_RECORD_FAULT(cfg) (((cfg)->stage == 1) ? (cfg)->record_faults : \
(cfg)->s2cfg.record_faults)
@@ -1258,6 +1261,165 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
}
}
+static void smmuv3_report_iommu_fault(SMMUS1Hwpt *hwpt,
+ struct iommu_hwpt_pgfault *fault)
+{
+ PendFaultEntry *pend;
+ SMMUDevice *sdev = hwpt->sdev;
+ SMMUv3State *s3 = sdev->smmu;
+ uint32_t sid = smmu_get_sid(sdev);
+ SMMUEventInfo info = {0};
+
+ info.sid = sid;
+ info.type = SMMU_EVT_F_TRANSLATION;
+ info.u.f_translation.addr = fault->addr;
+ info.u.f_translation.stall = true;
+ info.u.f_translation.ssid = fault->pasid;
+ info.u.f_translation.stag = fault->grpid;
+
+ if (fault->flags | IOMMU_PGFAULT_FLAGS_PASID_VALID) {
+ info.u.f_translation.ssv = true;
+ }
+ if (fault->perm & IOMMU_PGFAULT_PERM_READ) {
+ info.u.f_translation.rnw = true;
+ }
+ if (fault->perm & IOMMU_PGFAULT_PERM_PRIV) {
+ info.u.f_translation.pnu = true;
+ }
+ if (fault->perm & IOMMU_PGFAULT_PERM_EXEC) {
+ info.u.f_translation.ind = true;
+ }
+
+ pend = g_new0(PendFaultEntry, 1);
+ memcpy(&pend->fault, fault, sizeof(*fault));
+ qemu_mutex_lock(&hwpt->fault_mutex);
+ QTAILQ_INSERT_TAIL(&hwpt->pendfault, pend, entry);
+ qemu_mutex_unlock(&hwpt->fault_mutex);
+ smmuv3_record_event(s3, &info);
+ return;
+}
+
+static void smmuv3_notify_stall_resume(SMMUState *bs, uint32_t sid,
+ uint32_t stag, uint32_t code)
+{
+ SMMUDevice *sdev = smmu_find_sdev(bs, sid);
+ PageRespEntry *msg;
+ PendFaultEntry *pend, *tmp;
+ SMMUS1Hwpt *hwpt;
+ bool found = false;
+
+ if (!sdev) {
+ return;
+ }
+
+ hwpt = sdev->s1_hwpt;
+ msg = g_new0(PageRespEntry, 1);
+
+ /* Kernel expects addr and pasid info for page response */
+ qemu_mutex_lock(&hwpt->fault_mutex);
+ QTAILQ_FOREACH_SAFE(pend, &hwpt->pendfault, entry, tmp) {
+ if (pend->fault.grpid == stag) {
+ QTAILQ_REMOVE(&hwpt->pendfault, pend, entry);
+ msg->resp.cookie = pend->fault.cookie;
+ msg->resp.code = code;
+ QTAILQ_INSERT_TAIL(&hwpt->pageresp, msg, entry);
+ qemu_cond_signal(&hwpt->fault_cond);
+
+ g_free(pend);
+ found = true;
+ break;
+ }
+ }
+
+ qemu_mutex_unlock(&hwpt->fault_mutex);
+ if (!found) {
+ warn_report("No matching fault for resume(stag 0x%x), drop!", stag);
+ return;
+ }
+}
+
+static void *write_fault_handler(void *opaque)
+{
+ SMMUS1Hwpt *hwpt = opaque;
+ PageRespEntry *msg, *tmp;
+ struct iommu_hwpt_page_response *resp;
+ int ret;
+
+ resp = g_new0(struct iommu_hwpt_page_response, 1);
+ while (!hwpt->exiting) {
+ /* Check we have any pending responses */
+ qemu_mutex_lock(&hwpt->fault_mutex);
+ qemu_cond_wait(&hwpt->fault_cond, &hwpt->fault_mutex);
+ QTAILQ_FOREACH_SAFE(msg, &hwpt->pageresp, entry, tmp) {
+ QTAILQ_REMOVE(&hwpt->pageresp, msg, entry);
+ memcpy(resp, &msg->resp, sizeof(*resp));
+ g_free(msg);
+
+ ret = write(hwpt->out_fault_fd, resp, sizeof(*resp));
+ if (ret != sizeof(*resp)) {
+ warn_report("Write resp[cookie 0x%x] fail %d",
+ resp->cookie, ret);
+ }
+ }
+ qemu_mutex_unlock(&hwpt->fault_mutex);
+ }
+ g_free(resp);
+ return NULL;
+}
+
+static void *read_fault_handler(void *opaque)
+{
+ SMMUS1Hwpt *hwpt = opaque;
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ struct iommu_hwpt_pgfault *fault;
+ struct io_uring *ring = &hwpt->fault_ring;
+ void *data;
+ int ret;
+
+ fault = g_new0(struct iommu_hwpt_pgfault, 1);
+ while (!hwpt->exiting) {
+ sqe = io_uring_get_sqe(ring);
+ io_uring_prep_read(sqe, hwpt->out_fault_fd, fault,
+ sizeof(*fault), 0);
+ io_uring_sqe_set_data(sqe, fault);
+ io_uring_submit(ring);
+
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret == 0) {
+ if (cqe->res == sizeof(*fault)) {
+ data = io_uring_cqe_get_data(cqe);
+ smmuv3_report_iommu_fault(hwpt, data);
+ }
+ } else {
+ warn_report("Read fault[hwpt_id 0x%x] failed %d",
+ hwpt->hwpt_id, ret);
+ }
+ io_uring_cqe_seen(ring, cqe);
+ }
+ g_free(fault);
+ return NULL;
+}
+
+static void create_fault_handlers(SMMUS1Hwpt *hwpt)
+{
+ if (!hwpt->out_fault_fd) {
+ warn_report("No fault fd for hwpt id: %d", hwpt->hwpt_id);
+ return;
+ }
+
+ io_uring_queue_init(1024, &hwpt->fault_ring, 0);
+ qemu_mutex_init(&hwpt->fault_mutex);
+ qemu_cond_init(&hwpt->fault_cond);
+ QTAILQ_INIT(&hwpt->pageresp);
+ QTAILQ_INIT(&hwpt->pendfault);
+ qemu_thread_create(&hwpt->read_fault_thread, "io fault read",
+ read_fault_handler,
+ hwpt, QEMU_THREAD_JOINABLE);
+ qemu_thread_create(&hwpt->write_fault_thread, "io fault write",
+ write_fault_handler,
+ hwpt, QEMU_THREAD_JOINABLE);
+}
static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid)
{
#ifdef __linux__
@@ -1266,6 +1428,7 @@ static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid)
struct iommu_hwpt_arm_smmuv3 nested_data = {};
SMMUv3State *s = sdev->smmu;
SMMUState *bs = &s->smmu_state;
+ bool req_fault_fd = false;
uint32_t config;
STE ste;
int ret;
@@ -1309,13 +1472,22 @@ static void smmuv3_install_nested_ste(SMMUDevice *sdev, int sid)
/* S1DSS | S1CIR | S1COR | S1CSH | S1STALLD | EATS */
nested_data.ste[1] &= 0x380000ffULL;
+ if (STE_S1CDMAX(&ste)) {
+ req_fault_fd = true;
+ }
+
ret = smmu_dev_install_nested_ste(sdev, IOMMU_HWPT_DATA_ARM_SMMUV3,
- sizeof(nested_data), &nested_data);
+ sizeof(nested_data), &nested_data,
+ req_fault_fd);
if (ret) {
error_report("Unable to install nested STE=%16LX:%16LX, ret=%d",
nested_data.ste[1], nested_data.ste[0], ret);
}
+ if (req_fault_fd) {
+ create_fault_handlers(sdev->s1_hwpt);
+ }
+
trace_smmuv3_install_nested_ste(sid, nested_data.ste[1], nested_data.ste[0]);
#endif
}
@@ -1631,10 +1803,22 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
case SMMU_CMD_TLBI_EL2_VA:
case SMMU_CMD_TLBI_EL2_VAA:
case SMMU_CMD_PRI_RESP:
- case SMMU_CMD_RESUME:
case SMMU_CMD_STALL_TERM:
trace_smmuv3_unhandled_cmd(type);
break;
+ case SMMU_CMD_RESUME:
+ {
+ uint32_t sid = CMD_SID(&cmd);
+ uint16_t stag = CMD_RESUME_STAG(&cmd);
+ uint8_t action = CMD_RESUME_AC(&cmd);
+ uint32_t code = IOMMUFD_PAGE_RESP_INVALID;
+
+ if (action) {
+ code = IOMMUFD_PAGE_RESP_SUCCESS;
+ }
+ smmuv3_notify_stall_resume(bs, sid, stag, code);
+ break;
+ }
default:
cmd_error = SMMU_CERROR_ILL;
break;
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 528023b95b..c0eb87c78c 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -344,7 +344,7 @@ static int iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
container->ioas_id, flags,
IOMMU_HWPT_DATA_NONE, 0, NULL,
- &hwpt_id, errp)) {
+ &hwpt_id, NULL, errp)) {
return -EINVAL;
}
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index e30539a8d4..087a11efc7 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h
@@ -138,13 +138,34 @@ typedef struct SMMUVdev {
uint32_t sid;
}SMMUVdev;
+typedef struct PendFaultEntry {
+ struct iommu_hwpt_pgfault fault;
+ QTAILQ_ENTRY(PendFaultEntry) entry;
+} PendFaultEntry;
+
+typedef struct PageRespEntry {
+ struct iommu_hwpt_page_response resp;
+ QTAILQ_ENTRY(PageRespEntry) entry;
+} PageRespEntry;
+
typedef struct SMMUS1Hwpt {
+ void *sdev;
void *smmu;
IOMMUFDBackend *iommufd;
SMMUViommu *viommu;
uint32_t hwpt_id;
+ uint32_t out_fault_fd;
QLIST_HEAD(, SMMUDevice) device_list;
QLIST_ENTRY(SMMUViommu) next;
+ /* fault handling */
+ struct io_uring fault_ring;
+ QemuThread read_fault_thread;
+ QemuThread write_fault_thread;
+ QemuMutex fault_mutex;
+ QemuCond fault_cond;
+ QTAILQ_HEAD(, PageRespEntry) pageresp;
+ QTAILQ_HEAD(, PendFaultEntry) pendfault;
+ bool exiting;
} SMMUS1Hwpt;
typedef struct SMMUDevice {
@@ -258,7 +279,8 @@ int smmu_dev_get_info(SMMUDevice *sdev, uint32_t *data_type,
uint32_t data_len, void *data);
void smmu_dev_uninstall_nested_ste(SMMUDevice *sdev, bool abort);
int smmu_dev_install_nested_ste(SMMUDevice *sdev, uint32_t data_type,
- uint32_t data_len, void *data);
+ uint32_t data_len, void *data,
+ bool req_fault_fd);
int smmu_hwpt_invalidate_cache(SMMUS1Hwpt *s1_hwpt, uint32_t type, uint32_t len,
uint32_t *num, void *reqs);
int smmu_viommu_invalidate_cache(IOMMUFDViommu *viommu, uint32_t type,
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 0f2c826036..b279184974 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -62,7 +62,7 @@ bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
uint32_t pt_id, uint32_t flags,
uint32_t data_type, uint32_t data_len,
void *data_ptr, uint32_t *out_hwpt,
- Error **errp);
+ uint32_t *out_fault_fd, Error **errp);
bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
bool start, Error **errp);
bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
--
2.41.0.windows.1

View File

@ -0,0 +1,325 @@
From 2697e7418c1e0d87c82feca33800e3a093546a90 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Thu, 16 Jan 2025 15:20:18 +0000
Subject: [PATCH] smmuv3: Change arm-smmuv3-nested name to arm-smmuv3-accel
This is based on feedback received for RFC v1.
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/smmuv3.c | 38 +++++++++++++++++++-------------------
hw/arm/virt-acpi-build.c | 16 ++++++++--------
hw/arm/virt.c | 24 ++++++++++++------------
hw/core/sysbus-fdt.c | 2 +-
include/hw/arm/smmuv3.h | 8 ++++----
include/hw/arm/virt.h | 10 +++++-----
6 files changed, 49 insertions(+), 49 deletions(-)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 6964ab000d..ecdad6bda4 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -2253,14 +2253,14 @@ static void smmu_realize(DeviceState *d, Error **errp)
smmu_init_irq(s, dev);
}
-static int smmuv3_nested_pci_host_bridge(Object *obj, void *opaque)
+static int smmuv3_accel_pci_host_bridge(Object *obj, void *opaque)
{
DeviceState *d = opaque;
- SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
+ SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(d);
if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;
- if (s_nested->pci_bus && !strcmp(bus->qbus.name, s_nested->pci_bus)) {
+ if (s_accel->pci_bus && !strcmp(bus->qbus.name, s_accel->pci_bus)) {
object_property_set_link(OBJECT(d), "primary-bus", OBJECT(bus),
&error_abort);
}
@@ -2268,15 +2268,15 @@ static int smmuv3_nested_pci_host_bridge(Object *obj, void *opaque)
return 0;
}
-static void smmu_nested_realize(DeviceState *d, Error **errp)
+static void smmu_accel_realize(DeviceState *d, Error **errp)
{
- SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
- SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_GET_CLASS(s_nested);
+ SMMUv3AccelState *s_nested = ARM_SMMUV3_ACCEL(d);
+ SMMUv3AccelClass *c = ARM_SMMUV3_ACCEL_GET_CLASS(s_nested);
SysBusDevice *dev = SYS_BUS_DEVICE(d);
Error *local_err = NULL;
object_child_foreach_recursive(object_get_root(),
- smmuv3_nested_pci_host_bridge, d);
+ smmuv3_accel_pci_host_bridge, d);
object_property_set_bool(OBJECT(dev), "nested", true, &error_abort);
c->parent_realize(d, &local_err);
@@ -2365,8 +2365,8 @@ static Property smmuv3_properties[] = {
DEFINE_PROP_END_OF_LIST()
};
-static Property smmuv3_nested_properties[] = {
- DEFINE_PROP_STRING("pci-bus", SMMUv3NestedState, pci_bus),
+static Property smmuv3_accel_properties[] = {
+ DEFINE_PROP_STRING("pci-bus", SMMUv3AccelState, pci_bus),
DEFINE_PROP_END_OF_LIST()
};
@@ -2389,15 +2389,15 @@ static void smmuv3_class_init(ObjectClass *klass, void *data)
device_class_set_props(dc, smmuv3_properties);
}
-static void smmuv3_nested_class_init(ObjectClass *klass, void *data)
+static void smmuv3_accel_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_CLASS(klass);
+ SMMUv3AccelClass *c = ARM_SMMUV3_ACCEL_CLASS(klass);
dc->vmsd = &vmstate_smmuv3;
- device_class_set_parent_realize(dc, smmu_nested_realize,
+ device_class_set_parent_realize(dc, smmu_accel_realize,
&c->parent_realize);
- device_class_set_props(dc, smmuv3_nested_properties);
+ device_class_set_props(dc, smmuv3_accel_properties);
dc->user_creatable = true;
dc->hotpluggable = false;
}
@@ -2440,12 +2440,12 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
imrc->notify_flag_changed = smmuv3_notify_flag_changed;
}
-static const TypeInfo smmuv3_nested_type_info = {
- .name = TYPE_ARM_SMMUV3_NESTED,
+static const TypeInfo smmuv3_accel_type_info = {
+ .name = TYPE_ARM_SMMUV3_ACCEL,
.parent = TYPE_ARM_SMMUV3,
- .instance_size = sizeof(SMMUv3NestedState),
- .class_size = sizeof(SMMUv3NestedClass),
- .class_init = smmuv3_nested_class_init,
+ .instance_size = sizeof(SMMUv3AccelState),
+ .class_size = sizeof(SMMUv3AccelClass),
+ .class_init = smmuv3_accel_class_init,
};
static const TypeInfo smmuv3_type_info = {
@@ -2466,7 +2466,7 @@ static const TypeInfo smmuv3_iommu_memory_region_info = {
static void smmuv3_register_types(void)
{
type_register(&smmuv3_type_info);
- type_register(&smmuv3_nested_type_info);
+ type_register(&smmuv3_accel_type_info);
type_register(&smmuv3_iommu_memory_region_info);
}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index ad0f79e03d..db635120f9 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -418,10 +418,10 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
};
/*
- * Nested SMMU requires RMRs for MSI 1-1 mapping, which
+ * Accel SMMU requires RMRs for MSI 1-1 mapping, which
* require _DSM for PreservingPCI Boot Configurations
*/
- if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) {
+ if (vms->iommu == VIRT_IOMMU_SMMUV3_ACCEL) {
cfg.preserve_config = true;
}
@@ -619,10 +619,10 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
/* Table 2 The IORT */
acpi_table_begin(&table, table_data);
- if (vms->smmu_nested_count) {
- irq = vms->irqmap[VIRT_SMMU_NESTED] + ARM_SPI_BASE;
- base = vms->memmap[VIRT_SMMU_NESTED].base;
- num_smmus = vms->smmu_nested_count;
+ if (vms->smmu_accel_count) {
+ irq = vms->irqmap[VIRT_SMMU_ACCEL] + ARM_SPI_BASE;
+ base = vms->memmap[VIRT_SMMU_ACCEL].base;
+ num_smmus = vms->smmu_accel_count;
} else if (virt_has_smmuv3(vms)) {
irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
base = vms->memmap[VIRT_SMMU].base;
@@ -655,7 +655,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
}
next_range.input_base = idmap->input_base + idmap->id_count;
- if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) {
+ if (vms->iommu == VIRT_IOMMU_SMMUV3_ACCEL) {
nb_nodes++; /* RMR node per SMMU */
}
}
@@ -775,7 +775,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET, 0);
}
- if (vms->iommu == VIRT_IOMMU_SMMUV3_NESTED) {
+ if (vms->iommu == VIRT_IOMMU_SMMUV3_ACCEL) {
build_iort_rmr_nodes(table_data, smmu_idmaps, smmu_offset, &id);
}
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a55f297af2..57d00acd48 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -166,7 +166,7 @@ static const MemMapEntry base_memmap[] = {
/* In the virtCCA scenario, this space is used for MSI interrupt mapping */
[VIRT_CVM_MSI] = { 0x0a001000, 0x00fff000 },
[VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 },
- [VIRT_SMMU_NESTED] = { 0x0b010000, 0x00ff0000},
+ [VIRT_SMMU_ACCEL] = { 0x0b010000, 0x00ff0000},
/* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
[VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
[VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 },
@@ -212,7 +212,7 @@ static const int a15irqmap[] = {
[VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
[VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */
[VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
- [VIRT_SMMU_NESTED] = 200,
+ [VIRT_SMMU_ACCEL] = 200,
};
static const char *valid_cpus[] = {
@@ -3619,27 +3619,27 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
/* For smmuv3-nested devices we need to set the mem & irq */
if (device_is_dynamic_sysbus(mc, dev) &&
- object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_NESTED)) {
- hwaddr base = vms->memmap[VIRT_SMMU_NESTED].base;
- int irq = vms->irqmap[VIRT_SMMU_NESTED];
+ object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_ACCEL)) {
+ hwaddr base = vms->memmap[VIRT_SMMU_ACCEL].base;
+ int irq = vms->irqmap[VIRT_SMMU_ACCEL];
- if (vms->smmu_nested_count >= MAX_SMMU_NESTED) {
+ if (vms->smmu_accel_count >= MAX_SMMU_ACCEL) {
error_setg(errp, "smmuv3-nested max count reached!");
return;
}
- base += (vms->smmu_nested_count * SMMU_IO_LEN);
- irq += (vms->smmu_nested_count * NUM_SMMU_IRQS);
+ base += (vms->smmu_accel_count * SMMU_IO_LEN);
+ irq += (vms->smmu_accel_count * NUM_SMMU_IRQS);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
for (int i = 0; i < 4; i++) {
sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
qdev_get_gpio_in(vms->gic, irq + i));
}
- if (vms->iommu != VIRT_IOMMU_SMMUV3_NESTED) {
- vms->iommu = VIRT_IOMMU_SMMUV3_NESTED;
+ if (vms->iommu != VIRT_IOMMU_SMMUV3_ACCEL) {
+ vms->iommu = VIRT_IOMMU_SMMUV3_ACCEL;
}
- vms->smmu_nested_count++;
+ vms->smmu_accel_count++;
}
if (vms->platform_bus_dev) {
@@ -3815,7 +3815,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
- machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3_NESTED);
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3_ACCEL);
#ifdef CONFIG_TPM
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
#endif
diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
index 0f0d0b3e58..58f4dc614c 100644
--- a/hw/core/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -489,7 +489,7 @@ static const BindingEntry bindings[] = {
#ifdef CONFIG_LINUX
TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node),
TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
- TYPE_BINDING("arm-smmuv3-nested", no_fdt_node),
+ TYPE_BINDING("arm-smmuv3-accel", no_fdt_node),
VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
#endif
#ifdef CONFIG_TPM
diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
index 96513fce56..79b6fcd8e7 100644
--- a/include/hw/arm/smmuv3.h
+++ b/include/hw/arm/smmuv3.h
@@ -84,16 +84,16 @@ struct SMMUv3Class {
#define TYPE_ARM_SMMUV3 "arm-smmuv3"
OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3)
-#define TYPE_ARM_SMMUV3_NESTED "arm-smmuv3-nested"
-OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass, ARM_SMMUV3_NESTED)
+#define TYPE_ARM_SMMUV3_ACCEL "arm-smmuv3-accel"
+OBJECT_DECLARE_TYPE(SMMUv3AccelState, SMMUv3AccelClass, ARM_SMMUV3_ACCEL)
-struct SMMUv3NestedState {
+struct SMMUv3AccelState {
SMMUv3State smmuv3_state;
char *pci_bus;
};
-struct SMMUv3NestedClass {
+struct SMMUv3AccelClass {
/*< private >*/
SMMUv3Class smmuv3_class;
/*< public >*/
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index bc3c8b70da..3e2759d225 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -110,7 +110,7 @@ typedef enum {
#define SMMU_IO_LEN 0x20000
/* Max supported nested SMMUv3 */
-#define MAX_SMMU_NESTED 64
+#define MAX_SMMU_ACCEL 64
enum {
VIRT_FLASH,
@@ -124,7 +124,7 @@ enum {
VIRT_GIC_ITS,
VIRT_GIC_REDIST,
VIRT_SMMU,
- VIRT_SMMU_NESTED,
+ VIRT_SMMU_ACCEL,
VIRT_UART,
VIRT_CPUFREQ,
VIRT_MMIO,
@@ -159,7 +159,7 @@ enum {
typedef enum VirtIOMMUType {
VIRT_IOMMU_NONE,
VIRT_IOMMU_SMMUV3,
- VIRT_IOMMU_SMMUV3_NESTED,
+ VIRT_IOMMU_SMMUV3_ACCEL,
VIRT_IOMMU_VIRTIO,
} VirtIOMMUType;
@@ -227,7 +227,7 @@ struct VirtMachineState {
bool mte;
bool dtb_randomness;
bool pmu;
- int smmu_nested_count;
+ int smmu_accel_count;
OnOffAuto acpi;
VirtGICType gic_version;
VirtIOMMUType iommu;
@@ -298,7 +298,7 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms)
static inline bool virt_has_smmuv3(const VirtMachineState *vms)
{
return vms->iommu == VIRT_IOMMU_SMMUV3 ||
- vms->iommu == VIRT_IOMMU_SMMUV3_NESTED;
+ vms->iommu == VIRT_IOMMU_SMMUV3_ACCEL;
}
#endif /* QEMU_ARM_VIRT_H */
--
2.41.0.windows.1

View File

@ -0,0 +1,55 @@
From 5e83bdd94533c91d69c7154d967f3bdd2fa86054 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Thu, 16 Jan 2025 15:29:49 +0000
Subject: [PATCH] smmuv3: Use default bus for arm-smmuv3-accel
This is based on feedback on RFC v1.
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/smmuv3.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index ecdad6bda4..c0fcdd7574 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -2256,11 +2256,10 @@ static void smmu_realize(DeviceState *d, Error **errp)
static int smmuv3_accel_pci_host_bridge(Object *obj, void *opaque)
{
DeviceState *d = opaque;
- SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(d);
if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;
- if (s_accel->pci_bus && !strcmp(bus->qbus.name, s_accel->pci_bus)) {
+ if (d->parent_bus && !strcmp(bus->qbus.name, d->parent_bus->name)) {
object_property_set_link(OBJECT(d), "primary-bus", OBJECT(bus),
&error_abort);
}
@@ -2365,11 +2364,6 @@ static Property smmuv3_properties[] = {
DEFINE_PROP_END_OF_LIST()
};
-static Property smmuv3_accel_properties[] = {
- DEFINE_PROP_STRING("pci-bus", SMMUv3AccelState, pci_bus),
- DEFINE_PROP_END_OF_LIST()
-};
-
static void smmuv3_instance_init(Object *obj)
{
/* Nothing much to do here as of now */
@@ -2397,9 +2391,9 @@ static void smmuv3_accel_class_init(ObjectClass *klass, void *data)
dc->vmsd = &vmstate_smmuv3;
device_class_set_parent_realize(dc, smmu_accel_realize,
&c->parent_realize);
- device_class_set_props(dc, smmuv3_accel_properties);
dc->user_creatable = true;
dc->hotpluggable = false;
+ dc->bus_type = TYPE_PCIE_BUS;
}
static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
--
2.41.0.windows.1

View File

@ -0,0 +1,52 @@
From d4d0d15716a3f4c89ca9532e6b598b14db76ae0c Mon Sep 17 00:00:00 2001
From: Zhangfei Gao <zhangfei.gao@linaro.org>
Date: Sat, 26 Oct 2024 08:40:11 +0000
Subject: [PATCH] smmuv3: realize get_pasid_cap and set ssidsize with pasid
Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
---
hw/arm/smmu-common.c | 9 +++++++++
hw/arm/smmuv3.c | 3 +--
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index e7028bd4ec..3a257a5b0e 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -831,10 +831,19 @@ static void smmu_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
}
}
+static bool smmu_dev_get_pasid_cap(PCIBus *bus,
+ void *opaque, int devfn)
+{
+ assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+
+ return true;
+}
+
static const PCIIOMMUOps smmu_ops = {
.get_address_space = smmu_find_add_as,
.set_iommu_device = smmu_dev_set_iommu_device,
.unset_iommu_device = smmu_dev_unset_iommu_device,
+ .get_pasid_cap = smmu_dev_get_pasid_cap,
};
SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 0ca0e96fcc..6964ab000d 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -312,8 +312,7 @@ out:
val = FIELD_EX32(sdev->info.idr[1], IDR1, SIDSIZE);
s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SIDSIZE, val);
- val = FIELD_EX32(sdev->info.idr[1], IDR1, SSIDSIZE);
- s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SSIDSIZE, val);
+ s->idr[1] = FIELD_DP32(s->idr[1], IDR1, SSIDSIZE, pasid);
val = FIELD_EX32(sdev->info.idr[3], IDR3, HAD);
s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, val);
--
2.41.0.windows.1

View File

@ -0,0 +1,48 @@
From 4dea92e8570650776ed8caa0fedf0a90920f5e97 Mon Sep 17 00:00:00 2001
From: "Xin Li (Intel)" <xin@zytor.com>
Date: Wed, 7 Aug 2024 01:18:11 -0700
Subject: [PATCH] target/i386: Add VMX control bits for nested FRED support
commit 7c6ec5bc5fea92a4ddea3f0189e3a7e7588e1d19 upstream.
Add definitions of
1) VM-exit activate secondary controls bit
2) VM-entry load FRED bit
which are required to enable nested FRED.
Intel-SIG: commit 7c6ec5bc5fea target/i386: Add VMX control bits for nested FRED support
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
Link: https://lore.kernel.org/r/20240807081813.735158-3-xin@zytor.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 00e636e61c..f80570f4da 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1271,7 +1271,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
"vmx-exit-save-efer", "vmx-exit-load-efer",
"vmx-exit-save-preemption-timer", "vmx-exit-clear-bndcfgs",
NULL, "vmx-exit-clear-rtit-ctl", NULL, NULL,
- NULL, "vmx-exit-load-pkrs", NULL, NULL,
+ NULL, "vmx-exit-load-pkrs", NULL, "vmx-exit-secondary-ctls",
},
.msr = {
.index = MSR_IA32_VMX_TRUE_EXIT_CTLS,
@@ -1286,7 +1286,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
NULL, "vmx-entry-ia32e-mode", NULL, NULL,
NULL, "vmx-entry-load-perf-global-ctrl", "vmx-entry-load-pat", "vmx-entry-load-efer",
"vmx-entry-load-bndcfgs", NULL, "vmx-entry-load-rtit-ctl", NULL,
- NULL, NULL, "vmx-entry-load-pkrs", NULL,
+ NULL, NULL, "vmx-entry-load-pkrs", "vmx-entry-load-fred",
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
},
--
2.41.0.windows.1

View File

@ -0,0 +1,188 @@
From c3e47749fba4418d80bf4314335118452912b29c Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Wed, 8 Nov 2023 23:20:12 -0800
Subject: [PATCH] target/i386: Add get/set/migrate support for FRED MSRs
commit 4ebd98eb3ade5957a842da1420bda012eeeaab9c upstream.
FRED CPU states are managed in 9 new FRED MSRs, in addtion to a few
existing CPU registers and MSRs, e.g., CR4.FRED and MSR_IA32_PL0_SSP.
Save/restore/migrate FRED MSRs if FRED is exposed to the guest.
Intel-SIG: commit 4ebd98eb3ade target/i386: Add get/set/migrate support for FRED MSRs
Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Message-ID: <20231109072012.8078-7-xin3.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.h | 22 +++++++++++++++++++
target/i386/kvm/kvm.c | 49 +++++++++++++++++++++++++++++++++++++++++++
target/i386/machine.c | 28 +++++++++++++++++++++++++
3 files changed, 99 insertions(+)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index b03237c305..1b9d922651 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -539,6 +539,17 @@ typedef enum X86Seg {
#define MSR_IA32_XFD 0x000001c4
#define MSR_IA32_XFD_ERR 0x000001c5
+/* FRED MSRs */
+#define MSR_IA32_FRED_RSP0 0x000001cc /* Stack level 0 regular stack pointer */
+#define MSR_IA32_FRED_RSP1 0x000001cd /* Stack level 1 regular stack pointer */
+#define MSR_IA32_FRED_RSP2 0x000001ce /* Stack level 2 regular stack pointer */
+#define MSR_IA32_FRED_RSP3 0x000001cf /* Stack level 3 regular stack pointer */
+#define MSR_IA32_FRED_STKLVLS 0x000001d0 /* FRED exception stack levels */
+#define MSR_IA32_FRED_SSP1 0x000001d1 /* Stack level 1 shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_SSP2 0x000001d2 /* Stack level 2 shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_SSP3 0x000001d3 /* Stack level 3 shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_CONFIG 0x000001d4 /* FRED Entrypoint and interrupt stack level */
+
#define MSR_IA32_BNDCFGS 0x00000d90
#define MSR_IA32_XSS 0x00000da0
#define MSR_IA32_UMWAIT_CONTROL 0xe1
@@ -1698,6 +1709,17 @@ typedef struct CPUArchState {
target_ulong cstar;
target_ulong fmask;
target_ulong kernelgsbase;
+
+ /* FRED MSRs */
+ uint64_t fred_rsp0;
+ uint64_t fred_rsp1;
+ uint64_t fred_rsp2;
+ uint64_t fred_rsp3;
+ uint64_t fred_stklvls;
+ uint64_t fred_ssp1;
+ uint64_t fred_ssp2;
+ uint64_t fred_ssp3;
+ uint64_t fred_config;
#endif
uint64_t tsc_adjust;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 12e920bbb4..5f3497e122 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3391,6 +3391,17 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase);
kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask);
kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar);
+ if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, env->fred_rsp0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, env->fred_rsp1);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, env->fred_rsp2);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, env->fred_rsp3);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, env->fred_stklvls);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, env->fred_ssp1);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, env->fred_ssp2);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, env->fred_ssp3);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, env->fred_config);
+ }
}
#endif
@@ -3867,6 +3878,17 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0);
kvm_msr_entry_add(cpu, MSR_FMASK, 0);
kvm_msr_entry_add(cpu, MSR_LSTAR, 0);
+ if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, 0);
+ }
}
#endif
kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0);
@@ -4092,6 +4114,33 @@ static int kvm_get_msrs(X86CPU *cpu)
case MSR_LSTAR:
env->lstar = msrs[i].data;
break;
+ case MSR_IA32_FRED_RSP0:
+ env->fred_rsp0 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_RSP1:
+ env->fred_rsp1 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_RSP2:
+ env->fred_rsp2 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_RSP3:
+ env->fred_rsp3 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_STKLVLS:
+ env->fred_stklvls = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_SSP1:
+ env->fred_ssp1 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_SSP2:
+ env->fred_ssp2 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_SSP3:
+ env->fred_ssp3 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_CONFIG:
+ env->fred_config = msrs[i].data;
+ break;
#endif
case MSR_IA32_TSC:
env->tsc = msrs[i].data;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 9a1cb8f3b8..7cbfbc0efb 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1544,6 +1544,33 @@ static const VMStateDescription vmstate_msr_xfd = {
};
#ifdef TARGET_X86_64
+static bool intel_fred_msrs_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ return !!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED);
+}
+
+static const VMStateDescription vmstate_msr_fred = {
+ .name = "cpu/fred",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = intel_fred_msrs_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(env.fred_rsp0, X86CPU),
+ VMSTATE_UINT64(env.fred_rsp1, X86CPU),
+ VMSTATE_UINT64(env.fred_rsp2, X86CPU),
+ VMSTATE_UINT64(env.fred_rsp3, X86CPU),
+ VMSTATE_UINT64(env.fred_stklvls, X86CPU),
+ VMSTATE_UINT64(env.fred_ssp1, X86CPU),
+ VMSTATE_UINT64(env.fred_ssp2, X86CPU),
+ VMSTATE_UINT64(env.fred_ssp3, X86CPU),
+ VMSTATE_UINT64(env.fred_config, X86CPU),
+ VMSTATE_END_OF_LIST()
+ }
+ };
+
static bool amx_xtile_needed(void *opaque)
{
X86CPU *cpu = opaque;
@@ -1768,6 +1795,7 @@ const VMStateDescription vmstate_x86_cpu = {
&vmstate_pdptrs,
&vmstate_msr_xfd,
#ifdef TARGET_X86_64
+ &vmstate_msr_fred,
&vmstate_amx_xtile,
#endif
&vmstate_arch_lbr,
--
2.41.0.windows.1

View File

@ -0,0 +1,63 @@
From cfb01b2fe4a99ed030dacdc49064a152a472dc2d Mon Sep 17 00:00:00 2001
From: Chao Gao <chao.gao@intel.com>
Date: Thu, 19 Sep 2024 13:10:11 +0800
Subject: [PATCH] target/i386: Add more features enumerated by CPUID.7.2.EDX
commit 10eaf9c0fb7060f45807becbb2742a9de9bc3632 upstream
Following 5 bits in CPUID.7.2.EDX are supported by KVM. Add their
supports in QEMU. Each of them indicates certain bits of IA32_SPEC_CTRL
are supported. Those bits can control CPU speculation behavior which can
be used to defend against side-channel attacks.
bit0: intel-psfd
if 1, indicates bit 7 of the IA32_SPEC_CTRL MSR is supported. Bit 7 of
this MSR disables Fast Store Forwarding Predictor without disabling
Speculative Store Bypass
bit1: ipred-ctrl
If 1, indicates bits 3 and 4 of the IA32_SPEC_CTRL MSR are supported.
Bit 3 of this MSR enables IPRED_DIS control for CPL3. Bit 4 of this
MSR enables IPRED_DIS control for CPL0/1/2
bit2: rrsba-ctrl
If 1, indicates bits 5 and 6 of the IA32_SPEC_CTRL MSR are supported.
Bit 5 of this MSR disables RRSBA behavior for CPL3. Bit 6 of this MSR
disables RRSBA behavior for CPL0/1/2
bit3: ddpd-u
If 1, indicates bit 8 of the IA32_SPEC_CTRL MSR is supported. Bit 8 of
this MSR disables Data Dependent Prefetcher.
bit4: bhi-ctrl
if 1, indicates bit 10 of the IA32_SPEC_CTRL MSR is supported. Bit 10
of this MSR enables BHI_DIS_S behavior.
Intel-SIG: 10eaf9c0fb70 target/i386: Add more features enumerated by CPUID.7.2.EDX
Signed-off-by: Chao Gao <chao.gao@intel.com>
Link: https://lore.kernel.org/r/20240919051011.118309-1-chao.gao@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1fa08265bc..f3df62127c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1000,8 +1000,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
[FEAT_7_2_EDX] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
- NULL, NULL, NULL, NULL,
- NULL, "mcdt-no", NULL, NULL,
+ "intel-psfd", "ipred-ctrl", "rrsba-ctrl", "ddpd-u",
+ "bhi-ctrl", "mcdt-no", NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
--
2.41.0.windows.1

View File

@ -0,0 +1,41 @@
From afcdb893e4c702f4e009a98da71408cf54a53cc4 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 14 Aug 2024 03:54:27 -0400
Subject: [PATCH] target/i386: Construct CPUID 2 as stateful iff times > 1
commit 5ab639141b6d916a6f4041d4ec46f2f1a1e4a365 upstream.
When times == 1, the CPUID leaf 2 is not stateful.
Intel-SIG: commit 5ab639141b6d target/i386: Construct CPUID 2 as stateful iff times > 1
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20240814075431.339209-6-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/kvm/kvm.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 5057dfbd75..a867512822 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1896,10 +1896,12 @@ int kvm_arch_init_vcpu(CPUState *cs)
int times;
c->function = i;
- c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
- KVM_CPUID_FLAG_STATE_READ_NEXT;
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
times = c->eax & 0xff;
+ if (times > 1) {
+ c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
+ KVM_CPUID_FLAG_STATE_READ_NEXT;
+ }
for (j = 1; j < times; ++j) {
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
--
2.41.0.windows.1

View File

@ -0,0 +1,39 @@
From 1eacc509e9158b9e87f05fc9844142c0022b2d64 Mon Sep 17 00:00:00 2001
From: "Xin Li (Intel)" <xin@zytor.com>
Date: Wed, 7 Aug 2024 01:18:10 -0700
Subject: [PATCH] target/i386: Delete duplicated macro definition CR4_FRED_MASK
commit a23bc6539890d8b27458cf56bc4ed0e0d3c2de3e upstream.
Macro CR4_FRED_MASK is defined twice, delete one.
Intel-SIG: commit a23bc6539890 target/i386: Delete duplicated macro definition CR4_FRED_MASK
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
Link: https://lore.kernel.org/r/20240807081813.735158-2-xin@zytor.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.h | 6 ------
1 file changed, 6 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 1b9d922651..f022749c86 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -270,12 +270,6 @@ typedef enum X86Seg {
#define CR4_FRED_MASK 0
#endif
-#ifdef TARGET_X86_64
-#define CR4_FRED_MASK (1ULL << 32)
-#else
-#define CR4_FRED_MASK 0
-#endif
-
#define CR4_RESERVED_MASK \
(~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \
| CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \
--
2.41.0.windows.1

View File

@ -0,0 +1,57 @@
From e0b51ea0f229ea9c6788fa0da252e8100e30241e Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 14 Aug 2024 03:54:23 -0400
Subject: [PATCH] target/i386: Don't construct a all-zero entry for CPUID[0xD
0x3f]
commit 00c8a933d95add3ce4afebbe491ca0fa398a9007 upstream.
Currently, QEMU always constructs a all-zero CPUID entry for
CPUID[0xD 0x3f].
It's meaningless to construct such a leaf as the end of leaf 0xD. Rework
the logic of how subleaves of 0xD are constructed to get rid of such
all-zero value of subleaf 0x3f.
Intel-SIG: commit 00c8a933d95a target/i386: Don't construct a all-zero entry for CPUID[0xD 0x3f]
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20240814075431.339209-2-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/kvm/kvm.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 850104f6b5..5057dfbd75 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1924,10 +1924,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
case 0xb:
case 0xd:
for (j = 0; ; j++) {
- if (i == 0xd && j == 64) {
- break;
- }
-
c->function = i;
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
c->index = j;
@@ -1943,7 +1939,12 @@ int kvm_arch_init_vcpu(CPUState *cs)
break;
}
if (i == 0xd && c->eax == 0) {
- continue;
+ if (j < 63) {
+ continue;
+ } else {
+ cpuid_i--;
+ break;
+ }
}
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
fprintf(stderr, "cpuid_data is full, no space for "
--
2.41.0.windows.1

View File

@ -0,0 +1,70 @@
From 8c61e09f435ff3a965867b0496f01682d679182f Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 14 Aug 2024 03:54:24 -0400
Subject: [PATCH] target/i386: Enable fdp-excptn-only and zero-fcs-fds
commit 7dddc3bb875e7141ab25931d0f30a1c319bc8457 upstream.
- CPUID.(EAX=07H,ECX=0H):EBX[bit 6]: x87 FPU Data Pointer updated only
on x87 exceptions if 1.
- CPUID.(EAX=07H,ECX=0H):EBX[bit 13]: Deprecates FPU CS and FPU DS
values if 1. i.e., X87 FCS and FDS are always zero.
Define names for them so that they can be exposed to guest with -cpu host.
Also define the bit field MACROs so that named cpu models can add it as
well in the future.
Intel-SIG: commit 7dddc3bb875e target/i386: Enable fdp-excptn-only and zero-fcs-fds
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20240814075431.339209-3-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.c | 4 ++--
target/i386/cpu.h | 4 ++++
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index dfc0f7fd2d..d0aa2fb5ff 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -906,9 +906,9 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
"fsgsbase", "tsc-adjust", "sgx", "bmi1",
- "hle", "avx2", NULL, "smep",
+ "hle", "avx2", "fdp-excptn-only", "smep",
"bmi2", "erms", "invpcid", "rtm",
- NULL, NULL, "mpx", NULL,
+ NULL, "zero-fcs-fds", "mpx", NULL,
"avx512f", "avx512dq", "rdseed", "adx",
"smap", "avx512ifma", "pcommit", "clflushopt",
"clwb", "intel-pt", "avx512pf", "avx512er",
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index b90182582f..b883e5e1d6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -809,6 +809,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_0_EBX_HLE (1U << 4)
/* Intel Advanced Vector Extensions 2 */
#define CPUID_7_0_EBX_AVX2 (1U << 5)
+/* FPU data pointer updated only on x87 exceptions */
+#define CPUID_7_0_EBX_FDP_EXCPTN_ONLY (1u << 6)
/* Supervisor-mode Execution Prevention */
#define CPUID_7_0_EBX_SMEP (1U << 7)
/* 2nd Group of Advanced Bit Manipulation Extensions */
@@ -819,6 +821,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_0_EBX_INVPCID (1U << 10)
/* Restricted Transactional Memory */
#define CPUID_7_0_EBX_RTM (1U << 11)
+/* Zero out FPU CS and FPU DS */
+#define CPUID_7_0_EBX_ZERO_FCS_FDS (1U << 13)
/* Memory Protection Extension */
#define CPUID_7_0_EBX_MPX (1U << 14)
/* AVX-512 Foundation */
--
2.41.0.windows.1

View File

@ -0,0 +1,66 @@
From 07a671dc3e3baedb650b307c36d69bef869c2480 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 14 Aug 2024 03:54:31 -0400
Subject: [PATCH] target/i386: Make invtsc migratable when user sets tsc-khz
explicitly
commit 87c88db3143e91076d167a62dd7febf49afca8a2 upstream.
When user sets tsc-frequency explicitly, the invtsc feature is actually
migratable because the tsc-frequency is supposed to be fixed during the
migration.
See commit d99569d9d856 ("kvm: Allow invtsc migration if tsc-khz
is set explicitly") for referrence.
Intel-SIG: commit 87c88db3143e target/i386: Make invtsc migratable when user sets tsc-khz explicitly
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20240814075431.339209-10-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index d0aa2fb5ff..20358ffa91 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1685,9 +1685,10 @@ static inline uint64_t x86_cpu_xsave_xss_components(X86CPU *cpu)
* Returns the set of feature flags that are supported and migratable by
* QEMU, for a given FeatureWord.
*/
-static uint64_t x86_cpu_get_migratable_flags(FeatureWord w)
+static uint64_t x86_cpu_get_migratable_flags(X86CPU *cpu, FeatureWord w)
{
FeatureWordInfo *wi = &feature_word_info[w];
+ CPUX86State *env = &cpu->env;
uint64_t r = 0;
int i;
@@ -1701,6 +1702,12 @@ static uint64_t x86_cpu_get_migratable_flags(FeatureWord w)
r |= f;
}
}
+
+ /* when tsc-khz is set explicitly, invtsc is migratable */
+ if ((w == FEAT_8000_0007_EDX) && env->user_tsc_khz) {
+ r |= CPUID_APM_INVTSC;
+ }
+
return r;
}
@@ -6002,7 +6009,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w)
}
#endif
if (cpu && cpu->migratable) {
- r &= x86_cpu_get_migratable_flags(w);
+ r &= x86_cpu_get_migratable_flags(cpu, w);
}
return r;
}
--
2.41.0.windows.1

View File

@ -0,0 +1,66 @@
From 513d33050869a337262fdba0a2d064e7ce9fdb22 Mon Sep 17 00:00:00 2001
From: Lei Wang <lei4.wang@intel.com>
Date: Wed, 7 Aug 2024 01:18:12 -0700
Subject: [PATCH] target/i386: Raise the highest index value used for any VMCS
encoding
commit ab891454ebe82f7e359be721007652556f9f8356 upstream.
Because the index value of the VMCS field encoding of FRED injected-event
data (one of the newly added VMCS fields for FRED transitions), 0x52, is
larger than any existing index value, raise the highest index value used
for any VMCS encoding to 0x52.
Because the index value of the VMCS field encoding of Secondary VM-exit
controls, 0x44, is larger than any existing index value, raise the highest
index value used for any VMCS encoding to 0x44.
Intel-SIG: commit ab891454ebe8 target/i386: Raise the highest index value used for any VMCS encoding
Co-developed-by: Xin Li <xin3.li@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Signed-off-by: Lei Wang <lei4.wang@intel.com>
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
Link: https://lore.kernel.org/r/20240807081813.735158-4-xin@zytor.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.h | 1 +
target/i386/kvm/kvm.c | 9 ++++++++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index f022749c86..fb6721f182 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1166,6 +1166,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define VMX_VM_EXIT_PT_CONCEAL_PIP 0x01000000
#define VMX_VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
#define VMX_VM_EXIT_LOAD_IA32_PKRS 0x20000000
+#define VMX_VM_EXIT_ACTIVATE_SECONDARY_CONTROLS 0x80000000
#define VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
#define VMX_VM_ENTRY_IA32E_MODE 0x00000200
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 5f3497e122..ce96ed9158 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3254,7 +3254,14 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f)
kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0,
CR4_VMXE_MASK);
- if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) {
+ if (f[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
+ /* FRED injected-event data (0x2052). */
+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x52);
+ } else if (f[FEAT_VMX_EXIT_CTLS] &
+ VMX_VM_EXIT_ACTIVATE_SECONDARY_CONTROLS) {
+ /* Secondary VM-exit controls (0x2044). */
+ kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x44);
+ } else if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) {
/* TSC multiplier (0x2032). */
kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x32);
} else {
--
2.41.0.windows.1

View File

@ -0,0 +1,108 @@
From 110184b14d17c13e046e9c4ebed6c3cec29b31d0 Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Wed, 8 Nov 2023 23:20:07 -0800
Subject: [PATCH] target/i386: add support for FRED in CPUID enumeration
commit c1acad9f72d14daf918563eb77d2b31c39fbd06a upstream.
FRED, i.e., the Intel flexible return and event delivery architecture,
defines simple new transitions that change privilege level (ring
transitions).
The new transitions defined by the FRED architecture are FRED event
delivery and, for returning from events, two FRED return instructions.
FRED event delivery can effect a transition from ring 3 to ring 0, but
it is used also to deliver events incident to ring 0. One FRED
instruction (ERETU) effects a return from ring 0 to ring 3, while the
other (ERETS) returns while remaining in ring 0. Collectively, FRED
event delivery and the FRED return instructions are FRED transitions.
In addition to these transitions, the FRED architecture defines a new
instruction (LKGS) for managing the state of the GS segment register.
The LKGS instruction can be used by 64-bit operating systems that do
not use the new FRED transitions.
WRMSRNS is an instruction that behaves exactly like WRMSR, with the
only difference being that it is not a serializing instruction by
default. Under certain conditions, WRMSRNS may replace WRMSR to improve
performance. FRED uses it to switch RSP0 in a faster manner.
Search for the latest FRED spec in most search engines with this search
pattern:
site:intel.com FRED (flexible return and event delivery) specification
The CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[17] enumerates FRED, and
the CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[18] enumerates LKGS, and
the CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[19] enumerates WRMSRNS.
Add CPUID definitions for FRED/LKGS/WRMSRNS, and expose them to KVM guests.
Because FRED relies on LKGS and WRMSRNS, add that to feature dependency
map.
Intel-SIG: commit c1acad9f72d1 target/i386: add support for FRED in CPUID enumeration
Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Message-ID: <20231109072012.8078-2-xin3.li@intel.com>
[Fix order of dependencies, add dependencies from LM to FRED. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.c | 14 +++++++++++++-
target/i386/cpu.h | 6 ++++++
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 860934b39f..47f00392be 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -966,7 +966,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
"avx-vnni", "avx512-bf16", NULL, "cmpccxadd",
NULL, NULL, "fzrm", "fsrs",
"fsrc", NULL, NULL, NULL,
- NULL, NULL, NULL, NULL,
+ NULL, "fred", "lkgs", "wrmsrns",
NULL, "amx-fp16", NULL, "avx-ifma",
NULL, NULL, "lam", NULL,
NULL, NULL, NULL, NULL,
@@ -1553,6 +1553,18 @@ static FeatureDep feature_dependencies[] = {
.from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG },
.to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
},
+ {
+ .from = { FEAT_8000_0001_EDX, CPUID_EXT2_LM },
+ .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
+ },
+ {
+ .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_LKGS },
+ .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
+ },
+ {
+ .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS },
+ .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
+ },
};
typedef struct X86RegisterInfo32 {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 21fb769cce..f392626f98 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -941,6 +941,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8)
/* PREFETCHIT0/1 Instructions */
#define CPUID_7_1_EDX_PREFETCHITI (1U << 14)
+/* Flexible return and event delivery (FRED) */
+#define CPUID_7_1_EAX_FRED (1U << 17)
+/* Load into IA32_KERNEL_GS_BASE (LKGS) */
+#define CPUID_7_1_EAX_LKGS (1U << 18)
+/* Non-Serializing Write to Model Specific Register (WRMSRNS) */
+#define CPUID_7_1_EAX_WRMSRNS (1U << 19)
/* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */
#define CPUID_7_2_EDX_MCDT_NO (1U << 5)
--
2.41.0.windows.1

View File

@ -0,0 +1,62 @@
From 5f828613ba69ce640512a900f630515d980208dd Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Wed, 8 Nov 2023 23:20:11 -0800
Subject: [PATCH] target/i386: enumerate VMX nested-exception support
commit ef202d64c3020f3df03c39d3ad688732d81aaae8 upstream.
Allow VMX nested-exception support to be exposed in KVM guests, thus
nested KVM guests can enumerate it.
Intel-SIG: commit ef202d64c302 target/i386: enumerate VMX nested-exception support
Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Message-ID: <20231109072012.8078-6-xin3.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
scripts/kvm/vmxcap | 1 +
target/i386/cpu.c | 1 +
target/i386/cpu.h | 1 +
3 files changed, 3 insertions(+)
diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index 44898d73c2..508be19c75 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -117,6 +117,7 @@ controls = [
54: 'INS/OUTS instruction information',
55: 'IA32_VMX_TRUE_*_CTLS support',
56: 'Skip checks on event error code',
+ 58: 'VMX nested exception support',
},
msr = MSR_IA32_VMX_BASIC,
),
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 47f00392be..00e636e61c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1344,6 +1344,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
[54] = "vmx-ins-outs",
[55] = "vmx-true-ctls",
[56] = "vmx-any-errcode",
+ [58] = "vmx-nested-exception",
},
.msr = {
.index = MSR_IA32_VMX_BASIC,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 418daeab04..b03237c305 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1065,6 +1065,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define MSR_VMX_BASIC_INS_OUTS (1ULL << 54)
#define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55)
#define MSR_VMX_BASIC_ANY_ERRCODE (1ULL << 56)
+#define MSR_VMX_BASIC_NESTED_EXCEPTION (1ULL << 58)
#define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full
#define MSR_VMX_MISC_STORE_LMA (1ULL << 5)
--
2.41.0.windows.1

View File

@ -0,0 +1,39 @@
From bce44f92530fed18cac1e51f81217a6addf992bd Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 8 May 2024 11:10:54 +0200
Subject: [PATCH] target/i386: fix feature dependency for WAITPKG
commit fe01af5d47d4cf7fdf90c54d43f784e5068c8d72 upstream.
The VMX feature bit depends on general availability of WAITPKG,
not the other way round.
Intel-SIG: commit fe01af5d47d4 target/i386: fix feature dependency for WAITPKG
Fixes: 33cc88261c3 ("target/i386: add support for VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE", 2023-08-28)
Cc: qemu-stable@nongnu.org
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index f3df62127c..860934b39f 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1550,8 +1550,8 @@ static FeatureDep feature_dependencies[] = {
.to = { FEAT_SVM, ~0ull },
},
{
- .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
- .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG },
+ .from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG },
+ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
},
};
--
2.41.0.windows.1

View File

@ -0,0 +1,67 @@
From 1a2ee56c173984212ba7b9970aa36e307094d460 Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Wed, 8 Nov 2023 23:20:08 -0800
Subject: [PATCH] target/i386: mark CR4.FRED not reserved
commit f88ddc40c6d8b591a357108feec52cea13796d2d upstream.
The CR4.FRED bit, i.e., CR4[32], is no longer a reserved bit when FRED
is exposed to guests, otherwise it is still a reserved bit.
Intel-SIG: commit f88ddc40c6d8 target/i386: mark CR4.FRED not reserved
Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Message-ID: <20231109072012.8078-3-xin3.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.h | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index f392626f98..418daeab04 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -264,6 +264,18 @@ typedef enum X86Seg {
#define CR4_PKS_MASK (1U << 24)
#define CR4_LAM_SUP_MASK (1U << 28)
+#ifdef TARGET_X86_64
+#define CR4_FRED_MASK (1ULL << 32)
+#else
+#define CR4_FRED_MASK 0
+#endif
+
+#ifdef TARGET_X86_64
+#define CR4_FRED_MASK (1ULL << 32)
+#else
+#define CR4_FRED_MASK 0
+#endif
+
#define CR4_RESERVED_MASK \
(~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \
| CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \
@@ -272,7 +284,7 @@ typedef enum X86Seg {
| CR4_LA57_MASK \
| CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \
| CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK \
- | CR4_LAM_SUP_MASK))
+ | CR4_LAM_SUP_MASK | CR4_FRED_MASK))
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
@@ -2551,6 +2563,9 @@ static inline uint64_t cr4_reserved_bits(CPUX86State *env)
if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) {
reserved_bits |= CR4_LAM_SUP_MASK;
}
+ if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED)) {
+ reserved_bits |= CR4_FRED_MASK;
+ }
return reserved_bits;
}
--
2.41.0.windows.1

View File

@ -0,0 +1,108 @@
From bd6fec2cb2bb811aa73a2a6e6da45c76ecded49c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 27 Jun 2024 01:12:42 +0200
Subject: [PATCH] target/i386: pass X86CPU to
x86_cpu_get_supported_feature_word
commit 8dee38483274bd0fcf3f74dea024d719b958200d upstream.
This allows modifying the bits in "-cpu max"/"-cpu host" depending on
the guest CPU vendor (which, at least by default, is the host vendor in
the case of KVM).
For example, machine check architecture differs between Intel and AMD,
and bits from AMD should be dropped when configuring the guest for
an Intel model.
Intel-SIG: commit 8dee38483274 target/i386: pass X86CPU to x86_cpu_get_supported_feature_word
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Cc: John Allen <john.allen@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.c | 11 +++++------
target/i386/cpu.h | 3 +--
target/i386/kvm/kvm-cpu.c | 2 +-
3 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index f80570f4da..dfc0f7fd2d 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5959,8 +5959,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
#endif /* !CONFIG_USER_ONLY */
-uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
- bool migratable_only)
+uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w)
{
FeatureWordInfo *wi = &feature_word_info[w];
uint64_t r = 0;
@@ -6002,7 +6001,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
r &= ~unavail;
}
#endif
- if (migratable_only) {
+ if (cpu && cpu->migratable) {
r &= x86_cpu_get_migratable_flags(w);
}
return r;
@@ -7324,7 +7323,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
* by the user.
*/
env->features[w] |=
- x86_cpu_get_supported_feature_word(w, cpu->migratable) &
+ x86_cpu_get_supported_feature_word(cpu, w) &
~env->user_features[w] &
~feature_word_info[w].no_autoenable_flags;
}
@@ -7450,7 +7449,7 @@ static void x86_cpu_filter_features(X86CPU *cpu, bool verbose)
for (w = 0; w < FEATURE_WORDS; w++) {
uint64_t host_feat =
- x86_cpu_get_supported_feature_word(w, false);
+ x86_cpu_get_supported_feature_word(NULL, w);
uint64_t requested_features = env->features[w];
uint64_t unavailable_features = requested_features & ~host_feat;
mark_unavailable_features(cpu, w, unavailable_features, prefix);
@@ -7566,7 +7565,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_LBR_FMT;
if (requested_lbr_fmt && kvm_enabled()) {
uint64_t host_perf_cap =
- x86_cpu_get_supported_feature_word(FEAT_PERF_CAPABILITIES, false);
+ x86_cpu_get_supported_feature_word(NULL, FEAT_PERF_CAPABILITIES);
unsigned host_lbr_fmt = host_perf_cap & PERF_CAP_LBR_FMT;
if (!cpu->enable_pmu) {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index fb6721f182..b90182582f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -655,8 +655,7 @@ typedef enum FeatureWord {
} FeatureWord;
typedef uint64_t FeatureWordArray[FEATURE_WORDS];
-uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
- bool migratable_only);
+uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
/* cpuid_features bits */
#define CPUID_FP87 (1U << 0)
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index f76972e47e..a3bc8d8f83 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -137,7 +137,7 @@ static void kvm_cpu_xsave_init(void)
if (!esa->size) {
continue;
}
- if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits)
+ if ((x86_cpu_get_supported_feature_word(NULL, esa->feature) & esa->bits)
!= esa->bits) {
continue;
}
--
2.41.0.windows.1

View File

@ -0,0 +1,39 @@
From 52cc8f5a9ba854268a58402d351d2fd43dddb1b4 Mon Sep 17 00:00:00 2001
From: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
Date: Mon, 7 Apr 2025 17:54:20 -0400
Subject: [PATCH] target/s390x: Fix a typo in s390_cpu_class_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
cheery-pick from 6a93b1c7b4cfa4f5e3c0b8a17177ce14aaa2346c
Replace the comma at the end of the line by a semicolon.
Fixes: 41868f846d2 ("s390x/cpumodel: "host" and "qemu" as CPU subclasses")
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250324165356.39540-1-philmd@linaro.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: qihao_yewu <qihao_yewu@cmss.chinamobile.com>
---
target/s390x/cpu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 6acfa1c91b..5e64f24cc2 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -350,7 +350,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
device_class_set_parent_reset(dc, s390_cpu_reset_full, &scc->parent_reset);
scc->reset = s390_cpu_reset;
- cc->class_by_name = s390_cpu_class_by_name,
+ cc->class_by_name = s390_cpu_class_by_name;
cc->has_work = s390_cpu_has_work;
cc->dump_state = s390_cpu_dump_state;
cc->query_cpu_fast = s390_query_cpu_fast;
--
2.41.0.windows.1

View File

@ -0,0 +1,70 @@
From 4a065d0fbbe159dfbc073e4480434d6889b7c5a4 Mon Sep 17 00:00:00 2001
From: caijian <caijian11@h-partners.com>
Date: Mon, 31 Mar 2025 15:03:02 +0800
Subject: [PATCH] tests/data/acpi: Update DSDT acpi tables
- * Disassembly of tests/data/acpi/virt/DSDT, Fri Mar 28 16:43:04 2025
+ * Disassembly of /tmp/aml-1KF432, Fri Mar 28 16:43:04 2025
*
* Original Table Header:
* Signature "DSDT"
* Length 0x000016B6 (5814)
* Revision 0x02
- * Checksum 0x46
+ * Checksum 0x47
* OEM ID "BOCHS "
* OEM Table ID "BXPC "
* OEM Revision 0x00000001 (1)
* Compiler ID "BXPC"
* Compiler Version 0x00000001 (1)
*/
DefinitionBlock ("", "DSDT", 2, "BOCHS ", "BXPC ", 0x00000001)
@@ -2090,33 +2090,33 @@
}
Else
{
CDW1 |= 0x04
Return (Arg3)
}
}
Method (_DSM, 4, NotSerialized) // _DSM: Device-Specific Method
{
If ((Arg0 == ToUUID ("e5c937d0-3553-4d7a-9117-ea4d19c3434d") /* Device Labeling Interface */))
{
If ((Arg2 == Zero))
{
Return (Buffer (One)
{
- 0x01 // .
+ 0x00 // .
})
}
}
Return (Buffer (One)
{
0x00
})
}
Signed-off-by: caijian <caijian11@h-partners.com>
---
tests/qtest/bios-tables-test-allowed-diff.h | 6 ------
1 files changed, 6 deletions(-)
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index e4a94bb8bd..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1,7 +1 @@
/* List of comma-separated changed AML files to ignore */
-"tests/data/acpi/microvm/DSDT.pcie",
-"tests/data/acpi/virt/DSDT",
-"tests/data/acpi/virt/DSDT.acpihmatvirt",
-"tests/data/acpi/virt/DSDT.memhp",
-"tests/data/acpi/virt/DSDT.pxb",
-"tests/data/acpi/virt/DSDT.topology",
--
2.41.0.windows.1

View File

@ -0,0 +1,76 @@
From bf12438e93f2d55aac6245f6a9f77f51b6fd2d8a Mon Sep 17 00:00:00 2001
From: caijian <caijian11@h-partners.com>
Date: Mon, 31 Mar 2025 15:06:24 +0800
Subject: [PATCH] tests/data/acpi/virt: Update IORT acpi table
- * Disassembly of tests/data/acpi/virt/IORT, Fri Mar 28 18:05:37 2025
+ * Disassembly of /tmp/aml-9R3932, Fri Mar 28 18:05:37 2025
*
* ACPI Data Table [IORT]
*
* Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue
*/
[000h 0000 4] Signature : "IORT" [IO Remapping Table]
[004h 0004 4] Table Length : 00000080
-[008h 0008 1] Revision : 03
-[009h 0009 1] Checksum : B3
+[008h 0008 1] Revision : 05
+[009h 0009 1] Checksum : AE
[00Ah 0010 6] Oem ID : "BOCHS "
[010h 0016 8] Oem Table ID : "BXPC "
[018h 0024 4] Oem Revision : 00000001
[01Ch 0028 4] Asl Compiler ID : "BXPC"
[020h 0032 4] Asl Compiler Revision : 00000001
@@ -45,32 +45,32 @@
[058h 0088 4] Cache Coherency : 00000001
[05Ch 0092 1] Hints (decoded below) : 00
Transient : 0
Write Allocate : 0
Read Allocate : 0
Override : 0
[05Dh 0093 2] Reserved : 0000
[05Fh 0095 1] Memory Flags (decoded below) : 03
Coherency : 1
Device Attribute : 1
[060h 0096 4] ATS Attribute : 00000000
[064h 0100 4] PCI Segment Number : 00000000
[068h 0104 1] Memory Size Limit : 40
[069h 0105 3] Reserved : 000000
[06Ch 0108 4] Input base : 00000000
-[070h 0112 4] ID Count : 0000FFFF
+[070h 0112 4] ID Count : 00010000
[074h 0116 4] Output Base : 00000000
[078h 0120 4] Output Reference : 00000030
[07Ch 0124 4] Flags (decoded below) : 00000000
Single Mapping : 0
Raw Table Data: Length 128 (0x80)
- 0000: 49 4F 52 54 80 00 00 00 03 B3 42 4F 43 48 53 20 // IORT......BOCHS
+ 0000: 49 4F 52 54 80 00 00 00 05 AE 42 4F 43 48 53 20 // IORT......BOCHS
0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43 // BXPC ....BXPC
0020: 01 00 00 00 02 00 00 00 30 00 00 00 00 00 00 00 // ........0.......
0030: 00 18 00 01 00 00 00 00 00 00 00 00 00 00 00 00 // ................
0040: 01 00 00 00 00 00 00 00 02 38 00 03 01 00 00 00 // .........8......
0050: 01 00 00 00 24 00 00 00 01 00 00 00 00 00 00 03 // ....$...........
0060: 00 00 00 00 00 00 00 00 40 00 00 00 00 00 00 00 // ........@.......
- 0070: FF FF 00 00 00 00 00 00 30 00 00 00 00 00 00 00 // ........0.......
+ 0070: 00 00 01 00 00 00 00 00 30 00 00 00 00 00 00 00 // ........0.......
Signed-off-by: caijian <caijian11@h-partners.com>
---
tests/qtest/bios-tables-test-allowed-diff.h | 1 -
1 files changed, 1 deletion(-)
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index 9a5a923d6b..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1,2 +1 @@
/* List of comma-separated changed AML files to ignore */
-"tests/data/acpi/virt/IORT",
--
2.41.0.windows.1

View File

@ -0,0 +1,27 @@
From ea23e4215b332446d4964769d004f7a11caba00b Mon Sep 17 00:00:00 2001
From: caijian <caijian11@h-partners.com>
Date: Mon, 31 Mar 2025 15:02:37 +0800
Subject: [PATCH] tests/qtest: Allow DSDT acpi tables to change
List all DSDT files and allow them to change.
Signed-of-by: caijian <caijian11@h-partners.com>
---
tests/qtest/bios-tables-test-allowed-diff.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..e4a94bb8bd 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,7 @@
/* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/microvm/DSDT.pcie",
+"tests/data/acpi/virt/DSDT",
+"tests/data/acpi/virt/DSDT.acpihmatvirt",
+"tests/data/acpi/virt/DSDT.memhp",
+"tests/data/acpi/virt/DSDT.pxb",
+"tests/data/acpi/virt/DSDT.topology",
--
2.41.0.windows.1

View File

@ -0,0 +1,22 @@
From ca17fd9b9e608e0a6e8a948ccf46fa020c12f510 Mon Sep 17 00:00:00 2001
From: caijian <caijian11@h-partners.com>
Date: Mon, 31 Mar 2025 15:06:13 +0800
Subject: [PATCH] tests/qtest: Allow IORT acpi table to change
List changed IORT file and allow it to change.
Signed-off-by: caijian <caijian11@h-partners.com>
---
tests/qtest/bios-tables-test-allowed-diff.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..9a5a923d6b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
/* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/virt/IORT",
--
2.41.0.windows.1

View File

@ -0,0 +1,167 @@
From 90688ff9c5802965f24460ac79fe52b93d2adb1f Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Sat, 11 Jan 2025 10:52:38 +0800
Subject: [PATCH] util/char_dev: Add open_cdev()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
/dev/vfio/devices/vfioX may not exist. In that case it is still possible
to open /dev/char/$major:$minor instead. Add helper function to abstract
the cdev open.
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
---
MAINTAINERS | 2 +
include/qemu/chardev_open.h | 16 ++++++++
util/chardev_open.c | 81 +++++++++++++++++++++++++++++++++++++
util/meson.build | 1 +
4 files changed, 100 insertions(+)
create mode 100644 include/qemu/chardev_open.h
create mode 100644 util/chardev_open.c
diff --git a/MAINTAINERS b/MAINTAINERS
index a5a446914a..ca70bb4e64 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2174,6 +2174,8 @@ M: Zhenzhong Duan <zhenzhong.duan@intel.com>
S: Supported
F: backends/iommufd.c
F: include/sysemu/iommufd.h
+F: include/qemu/chardev_open.h
+F: util/chardev_open.c
vhost
M: Michael S. Tsirkin <mst@redhat.com>
diff --git a/include/qemu/chardev_open.h b/include/qemu/chardev_open.h
new file mode 100644
index 0000000000..64e8fcfdcb
--- /dev/null
+++ b/include/qemu/chardev_open.h
@@ -0,0 +1,16 @@
+/*
+ * QEMU Chardev Helper
+ *
+ * Copyright (C) 2023 Intel Corporation.
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_CHARDEV_OPEN_H
+#define QEMU_CHARDEV_OPEN_H
+
+int open_cdev(const char *devpath, dev_t cdev);
+#endif
diff --git a/util/chardev_open.c b/util/chardev_open.c
new file mode 100644
index 0000000000..f776429788
--- /dev/null
+++ b/util/chardev_open.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ * Copyright (C) 2023 Intel Corporation.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ *
+ * Copied from
+ * https://github.com/linux-rdma/rdma-core/blob/master/util/open_cdev.c
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/chardev_open.h"
+
+static int open_cdev_internal(const char *path, dev_t cdev)
+{
+ struct stat st;
+ int fd;
+
+ fd = qemu_open_old(path, O_RDWR);
+ if (fd == -1) {
+ return -1;
+ }
+ if (fstat(fd, &st) || !S_ISCHR(st.st_mode) ||
+ (cdev != 0 && st.st_rdev != cdev)) {
+ close(fd);
+ return -1;
+ }
+ return fd;
+}
+
+static int open_cdev_robust(dev_t cdev)
+{
+ g_autofree char *devpath = NULL;
+
+ /*
+ * This assumes that udev is being used and is creating the /dev/char/
+ * symlinks.
+ */
+ devpath = g_strdup_printf("/dev/char/%u:%u", major(cdev), minor(cdev));
+ return open_cdev_internal(devpath, cdev);
+}
+
+int open_cdev(const char *devpath, dev_t cdev)
+{
+ int fd;
+
+ fd = open_cdev_internal(devpath, cdev);
+ if (fd == -1 && cdev != 0) {
+ return open_cdev_robust(cdev);
+ }
+ return fd;
+}
diff --git a/util/meson.build b/util/meson.build
index c2322ef6e7..174c133368 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -108,6 +108,7 @@ if have_block
util_ss.add(files('filemonitor-stub.c'))
endif
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
+ util_ss.add(when: 'CONFIG_LINUX', if_true: files('chardev_open.c'))
endif
if cpu == 'aarch64'
--
2.41.0.windows.1

View File

@ -0,0 +1,124 @@
From a152921f6d534f2a515b4e88304ad115fae8fa8f Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 5 Jun 2024 16:30:37 +0800
Subject: [PATCH] vfio: Create host IOMMU device instance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Create host IOMMU device instance in vfio_attach_device() and call
.realize() to initialize it further.
Introuduce attribute VFIOIOMMUClass::hiod_typename and initialize
it based on VFIO backend type. It will facilitate HostIOMMUDevice
creation in vfio_attach_device().
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/vfio/common.c | 18 +++++++++++++++++-
hw/vfio/container.c | 2 ++
hw/vfio/iommufd.c | 2 ++
include/hw/vfio/vfio-common.h | 1 +
include/hw/vfio/vfio-container-base.h | 3 +++
5 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b5d02df0c2..d5ff65f90a 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1650,6 +1650,8 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
{
const VFIOIOMMUClass *ops =
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
+ HostIOMMUDevice *hiod = NULL;
+ int ret;
if (vbasedev->iommufd) {
ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
@@ -1657,7 +1659,20 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
assert(ops);
- return ops->attach_device(name, vbasedev, as, errp);
+ ret = ops->attach_device(name, vbasedev, as, errp);
+ if (ret) {
+ return ret;
+ }
+
+ hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
+ if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
+ object_unref(hiod);
+ ops->detach_device(vbasedev);
+ return -1;
+ }
+ vbasedev->hiod = hiod;
+
+ return 0;
}
void vfio_detach_device(VFIODevice *vbasedev)
@@ -1665,5 +1680,6 @@ void vfio_detach_device(VFIODevice *vbasedev)
if (!vbasedev->bcontainer) {
return;
}
+ object_unref(vbasedev->hiod);
vbasedev->bcontainer->ops->detach_device(vbasedev);
}
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index ed54ce6d0c..10f7635425 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1240,6 +1240,8 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+ vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO;
+
vioc->setup = vfio_legacy_setup;
vioc->dma_map = vfio_legacy_dma_map;
vioc->dma_unmap = vfio_legacy_dma_unmap;
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 2efdba5565..7cbf0e44f1 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -629,6 +629,8 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+ vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO;
+
vioc->dma_map = iommufd_cdev_map;
vioc->dma_unmap = iommufd_cdev_unmap;
vioc->attach_device = iommufd_cdev_attach;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 376b8350b9..d45d40c329 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -140,6 +140,7 @@ typedef struct VFIODevice {
OnOffAuto pre_copy_dirty_page_tracking;
bool dirty_pages_supported;
bool dirty_tracking;
+ HostIOMMUDevice *hiod;
int devid;
IOMMUFDBackend *iommufd;
} VFIODevice;
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index b2813b0c11..7a4c575115 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -109,6 +109,9 @@ DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU)
struct VFIOIOMMUClass {
InterfaceClass parent_class;
+ /* Properties */
+ const char *hiod_typename;
+
/* basic feature */
int (*setup)(VFIOContainerBase *bcontainer, Error **errp);
int (*dma_map)(const VFIOContainerBase *bcontainer,
--
2.41.0.windows.1

View File

@ -0,0 +1,145 @@
From 65c5381ba3ce5f062f0be9aa796e68b8a9d6bb3c Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Sat, 11 Jan 2025 10:53:02 +0800
Subject: [PATCH] vfio: Introduce a helper function to initialize VFIODevice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Introduce a helper function to replace the common code to initialize
VFIODevice in pci, platform, ap and ccw VFIO device.
No functional change intended.
Suggested-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
---
hw/vfio/ap.c | 8 ++------
hw/vfio/ccw.c | 8 ++------
hw/vfio/helpers.c | 11 +++++++++++
hw/vfio/pci.c | 6 ++----
hw/vfio/platform.c | 6 ++----
include/hw/vfio/vfio-common.h | 2 ++
6 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 95fe7cd98b..e157aa1ff7 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -226,18 +226,14 @@ static void vfio_ap_instance_init(Object *obj)
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj);
VFIODevice *vbasedev = &vapdev->vdev;
- vbasedev->type = VFIO_DEVICE_TYPE_AP;
- vbasedev->ops = &vfio_ap_ops;
- vbasedev->dev = DEVICE(vapdev);
- vbasedev->fd = -1;
-
/*
* vfio-ap devices operate in a way compatible with discarding of
* memory in RAM blocks, as no pages are pinned in the host.
* This needs to be set before vfio_get_device() for vfio common to
* handle ram_block_discard_disable().
*/
- vbasedev->ram_block_discard_allowed = true;
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops,
+ DEVICE(vapdev), true);
}
#ifdef CONFIG_IOMMUFD
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 6305a4c1b8..90e4a53437 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -683,11 +683,6 @@ static void vfio_ccw_instance_init(Object *obj)
VFIOCCWDevice *vcdev = VFIO_CCW(obj);
VFIODevice *vbasedev = &vcdev->vdev;
- vbasedev->type = VFIO_DEVICE_TYPE_CCW;
- vbasedev->ops = &vfio_ccw_ops;
- vbasedev->dev = DEVICE(vcdev);
- vbasedev->fd = -1;
-
/*
* All vfio-ccw devices are believed to operate in a way compatible with
* discarding of memory in RAM blocks, ie. pages pinned in the host are
@@ -696,7 +691,8 @@ static void vfio_ccw_instance_init(Object *obj)
* needs to be set before vfio_get_device() for vfio common to handle
* ram_block_discard_disable().
*/
- vbasedev->ram_block_discard_allowed = true;
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops,
+ DEVICE(vcdev), true);
}
#ifdef CONFIG_IOMMUFD
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 3592c3d54e..6789870802 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -652,3 +652,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
}
vbasedev->fd = fd;
}
+
+void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
+ DeviceState *dev, bool ram_discard)
+{
+ vbasedev->type = type;
+ vbasedev->ops = ops;
+ vbasedev->dev = dev;
+ vbasedev->fd = -1;
+
+ vbasedev->ram_block_discard_allowed = ram_discard;
+}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 87405584d7..1874ec1aba 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3327,10 +3327,8 @@ static void vfio_instance_init(Object *obj)
vdev->host.slot = ~0U;
vdev->host.function = ~0U;
- vbasedev->type = VFIO_DEVICE_TYPE_PCI;
- vbasedev->ops = &vfio_pci_ops;
- vbasedev->dev = DEVICE(vdev);
- vbasedev->fd = -1;
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops,
+ DEVICE(vdev), false);
vdev->nv_gpudirect_clique = 0xFF;
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 506eb8193f..a8d9b7da63 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -657,10 +657,8 @@ static void vfio_platform_instance_init(Object *obj)
VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
VFIODevice *vbasedev = &vdev->vbasedev;
- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
- vbasedev->ops = &vfio_platform_ops;
- vbasedev->dev = DEVICE(vdev);
- vbasedev->fd = -1;
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops,
+ DEVICE(vdev), false);
}
#ifdef CONFIG_IOMMUFD
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 37f01410d5..151b2ab65f 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -271,4 +271,6 @@ int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
/* Returns 0 on success, or a negative errno. */
int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
+void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
+ DeviceState *dev, bool ram_discard);
#endif /* HW_VFIO_VFIO_COMMON_H */
--
2.41.0.windows.1

Some files were not shown because too many files have changed in this diff Show More