!343 Add migration support for VFIO devices
From: @imxcc Reviewed-by: @kevinzhu1 Signed-off-by: @kevinzhu1
This commit is contained in:
commit
3308e532d1
136
hw-net-fix-vmxnet3-live-migration.patch
Normal file
136
hw-net-fix-vmxnet3-live-migration.patch
Normal file
@ -0,0 +1,136 @@
|
||||
From b8b9f58ee5d3cff0a1e7cca770fe632043efb728 Mon Sep 17 00:00:00 2001
|
||||
From: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
|
||||
Date: Fri, 5 Jul 2019 04:07:11 +0300
|
||||
Subject: [PATCH] hw/net: fix vmxnet3 live migration
|
||||
|
||||
At some point vmxnet3 live migration stopped working and git-bisect
|
||||
didn't help finding a working version.
|
||||
The issue is the PCI configuration space is not being migrated
|
||||
successfully and MSIX remains masked at destination.
|
||||
|
||||
Remove the migration differentiation between PCI and PCIe since
|
||||
the logic resides now inside VMSTATE_PCI_DEVICE.
|
||||
Remove also the VMXNET3_COMPAT_FLAG_DISABLE_PCIE based differentiation
|
||||
since at 'realize' time is decided if the device is PCI or PCIe,
|
||||
then the above macro is enough.
|
||||
|
||||
Use the opportunity to move to the standard VMSTATE_MSIX
|
||||
instead of the deprecated SaveVMHandlers.
|
||||
|
||||
Signed-off-by: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
|
||||
Message-Id: <20190705010711.23277-1-marcel.apfelbaum@gmail.com>
|
||||
Tested-by: Sukrit Bhatnagar <skrtbhtngr@gmail.com>
|
||||
Reviewed-by: Dmitry Fleytman <dmitry.fleytman@gmail.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
hw/net/vmxnet3.c | 52 ++----------------------------------------------
|
||||
1 file changed, 2 insertions(+), 50 deletions(-)
|
||||
|
||||
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
|
||||
index ecc4f5bcf0..bf8e6ca4c9 100644
|
||||
--- a/hw/net/vmxnet3.c
|
||||
+++ b/hw/net/vmxnet3.c
|
||||
@@ -2153,21 +2153,6 @@ vmxnet3_cleanup_msi(VMXNET3State *s)
|
||||
msi_uninit(d);
|
||||
}
|
||||
|
||||
-static void
|
||||
-vmxnet3_msix_save(QEMUFile *f, void *opaque)
|
||||
-{
|
||||
- PCIDevice *d = PCI_DEVICE(opaque);
|
||||
- msix_save(d, f);
|
||||
-}
|
||||
-
|
||||
-static int
|
||||
-vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id)
|
||||
-{
|
||||
- PCIDevice *d = PCI_DEVICE(opaque);
|
||||
- msix_load(d, f);
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
static const MemoryRegionOps b0_ops = {
|
||||
.read = vmxnet3_io_bar0_read,
|
||||
.write = vmxnet3_io_bar0_write,
|
||||
@@ -2188,11 +2173,6 @@ static const MemoryRegionOps b1_ops = {
|
||||
},
|
||||
};
|
||||
|
||||
-static SaveVMHandlers savevm_vmxnet3_msix = {
|
||||
- .save_state = vmxnet3_msix_save,
|
||||
- .load_state = vmxnet3_msix_load,
|
||||
-};
|
||||
-
|
||||
static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
|
||||
{
|
||||
uint64_t dsn_payload;
|
||||
@@ -2215,7 +2195,6 @@ static uint64_t vmxnet3_device_serial_num(VMXNET3State *s)
|
||||
|
||||
static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
|
||||
{
|
||||
- DeviceState *dev = DEVICE(pci_dev);
|
||||
VMXNET3State *s = VMXNET3(pci_dev);
|
||||
int ret;
|
||||
|
||||
@@ -2261,8 +2240,6 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
|
||||
pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET,
|
||||
vmxnet3_device_serial_num(s));
|
||||
}
|
||||
-
|
||||
- register_savevm_live(dev, "vmxnet3-msix", -1, 1, &savevm_vmxnet3_msix, s);
|
||||
}
|
||||
|
||||
static void vmxnet3_instance_init(Object *obj)
|
||||
@@ -2452,29 +2429,6 @@ static const VMStateDescription vmstate_vmxnet3_int_state = {
|
||||
}
|
||||
};
|
||||
|
||||
-static bool vmxnet3_vmstate_need_pcie_device(void *opaque)
|
||||
-{
|
||||
- VMXNET3State *s = VMXNET3(opaque);
|
||||
-
|
||||
- return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE);
|
||||
-}
|
||||
-
|
||||
-static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id)
|
||||
-{
|
||||
- return !vmxnet3_vmstate_need_pcie_device(opaque);
|
||||
-}
|
||||
-
|
||||
-static const VMStateDescription vmstate_vmxnet3_pcie_device = {
|
||||
- .name = "vmxnet3/pcie",
|
||||
- .version_id = 1,
|
||||
- .minimum_version_id = 1,
|
||||
- .needed = vmxnet3_vmstate_need_pcie_device,
|
||||
- .fields = (VMStateField[]) {
|
||||
- VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State),
|
||||
- VMSTATE_END_OF_LIST()
|
||||
- }
|
||||
-};
|
||||
-
|
||||
static const VMStateDescription vmstate_vmxnet3 = {
|
||||
.name = "vmxnet3",
|
||||
.version_id = 1,
|
||||
@@ -2482,9 +2436,8 @@ static const VMStateDescription vmstate_vmxnet3 = {
|
||||
.pre_save = vmxnet3_pre_save,
|
||||
.post_load = vmxnet3_post_load,
|
||||
.fields = (VMStateField[]) {
|
||||
- VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State,
|
||||
- vmxnet3_vmstate_test_pci_device, 0,
|
||||
- vmstate_pci_device, PCIDevice),
|
||||
+ VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State),
|
||||
+ VMSTATE_MSIX(parent_obj, VMXNET3State),
|
||||
VMSTATE_BOOL(rx_packets_compound, VMXNET3State),
|
||||
VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State),
|
||||
VMSTATE_BOOL(lro_supported, VMXNET3State),
|
||||
@@ -2520,7 +2473,6 @@ static const VMStateDescription vmstate_vmxnet3 = {
|
||||
},
|
||||
.subsections = (const VMStateDescription*[]) {
|
||||
&vmxstate_vmxnet3_mcast_list,
|
||||
- &vmstate_vmxnet3_pcie_device,
|
||||
NULL
|
||||
}
|
||||
};
|
||||
--
|
||||
2.27.0
|
||||
|
||||
1551
include-Make-headers-more-self-contained.patch
Normal file
1551
include-Make-headers-more-self-contained.patch
Normal file
File diff suppressed because it is too large
Load Diff
517
linux-headers-Update-against-Add-migration-support-f.patch
Normal file
517
linux-headers-Update-against-Add-migration-support-f.patch
Normal file
@ -0,0 +1,517 @@
|
||||
From 7ab9ce4016ec48e0af8010f742ee39fc84342d00 Mon Sep 17 00:00:00 2001
|
||||
From: Jinhao Gao <gaojinhao@huawei.com>
|
||||
Date: Fri, 23 Jul 2021 14:55:12 +0800
|
||||
Subject: [PATCH] linux headers: Update against "Add migration support for VFIO
|
||||
devices"
|
||||
|
||||
Update linux-headers/linux/vfio.h against Linux 5.9-rc7 for the
|
||||
VFIO migration support series.
|
||||
|
||||
Signed-off-by: Jinhao Gao <gaojinhao@huawei.com>
|
||||
Signed-off-by: Shenming Lu <lushenming@huawei.com>
|
||||
---
|
||||
linux-headers/linux/vfio.h | 420 +++++++++++++++++++++++++++++++++++--
|
||||
1 file changed, 405 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
|
||||
index 24f505199f..a90672494d 100644
|
||||
--- a/linux-headers/linux/vfio.h
|
||||
+++ b/linux-headers/linux/vfio.h
|
||||
@@ -295,15 +295,39 @@ struct vfio_region_info_cap_type {
|
||||
__u32 subtype; /* type specific */
|
||||
};
|
||||
|
||||
+/*
|
||||
+ * List of region types, global per bus driver.
|
||||
+ * If you introduce a new type, please add it here.
|
||||
+ */
|
||||
+
|
||||
+/* PCI region type containing a PCI vendor part */
|
||||
#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31)
|
||||
#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
|
||||
+#define VFIO_REGION_TYPE_GFX (1)
|
||||
+#define VFIO_REGION_TYPE_CCW (2)
|
||||
+#define VFIO_REGION_TYPE_MIGRATION (3)
|
||||
+
|
||||
+/* sub-types for VFIO_REGION_TYPE_PCI_* */
|
||||
|
||||
-/* 8086 Vendor sub-types */
|
||||
+/* 8086 vendor PCI sub-types */
|
||||
#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
|
||||
#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
|
||||
#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
|
||||
|
||||
-#define VFIO_REGION_TYPE_GFX (1)
|
||||
+/* 10de vendor PCI sub-types */
|
||||
+/*
|
||||
+ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
|
||||
+ */
|
||||
+#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
|
||||
+
|
||||
+/* 1014 vendor PCI sub-types */
|
||||
+/*
|
||||
+ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
|
||||
+ * to do TLB invalidation on a GPU.
|
||||
+ */
|
||||
+#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
|
||||
+
|
||||
+/* sub-types for VFIO_REGION_TYPE_GFX */
|
||||
#define VFIO_REGION_SUBTYPE_GFX_EDID (1)
|
||||
|
||||
/**
|
||||
@@ -353,24 +377,237 @@ struct vfio_region_gfx_edid {
|
||||
#define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2
|
||||
};
|
||||
|
||||
-#define VFIO_REGION_TYPE_CCW (2)
|
||||
-/* ccw sub-types */
|
||||
+/* sub-types for VFIO_REGION_TYPE_CCW */
|
||||
#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1)
|
||||
+#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2)
|
||||
+#define VFIO_REGION_SUBTYPE_CCW_CRW (3)
|
||||
|
||||
-/*
|
||||
- * 10de vendor sub-type
|
||||
- *
|
||||
- * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
|
||||
- */
|
||||
-#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
|
||||
+/* sub-types for VFIO_REGION_TYPE_MIGRATION */
|
||||
+#define VFIO_REGION_SUBTYPE_MIGRATION (1)
|
||||
|
||||
/*
|
||||
- * 1014 vendor sub-type
|
||||
+ * The structure vfio_device_migration_info is placed at the 0th offset of
|
||||
+ * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related
|
||||
+ * migration information. Field accesses from this structure are only supported
|
||||
+ * at their native width and alignment. Otherwise, the result is undefined and
|
||||
+ * vendor drivers should return an error.
|
||||
*
|
||||
- * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
|
||||
- * to do TLB invalidation on a GPU.
|
||||
+ * device_state: (read/write)
|
||||
+ * - The user application writes to this field to inform the vendor driver
|
||||
+ * about the device state to be transitioned to.
|
||||
+ * - The vendor driver should take the necessary actions to change the
|
||||
+ * device state. After successful transition to a given state, the
|
||||
+ * vendor driver should return success on write(device_state, state)
|
||||
+ * system call. If the device state transition fails, the vendor driver
|
||||
+ * should return an appropriate -errno for the fault condition.
|
||||
+ * - On the user application side, if the device state transition fails,
|
||||
+ * that is, if write(device_state, state) returns an error, read
|
||||
+ * device_state again to determine the current state of the device from
|
||||
+ * the vendor driver.
|
||||
+ * - The vendor driver should return previous state of the device unless
|
||||
+ * the vendor driver has encountered an internal error, in which case
|
||||
+ * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR.
|
||||
+ * - The user application must use the device reset ioctl to recover the
|
||||
+ * device from VFIO_DEVICE_STATE_ERROR state. If the device is
|
||||
+ * indicated to be in a valid device state by reading device_state, the
|
||||
+ * user application may attempt to transition the device to any valid
|
||||
+ * state reachable from the current state or terminate itself.
|
||||
+ *
|
||||
+ * device_state consists of 3 bits:
|
||||
+ * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear,
|
||||
+ * it indicates the _STOP state. When the device state is changed to
|
||||
+ * _STOP, driver should stop the device before write() returns.
|
||||
+ * - If bit 1 is set, it indicates the _SAVING state, which means that the
|
||||
+ * driver should start gathering device state information that will be
|
||||
+ * provided to the VFIO user application to save the device's state.
|
||||
+ * - If bit 2 is set, it indicates the _RESUMING state, which means that
|
||||
+ * the driver should prepare to resume the device. Data provided through
|
||||
+ * the migration region should be used to resume the device.
|
||||
+ * Bits 3 - 31 are reserved for future use. To preserve them, the user
|
||||
+ * application should perform a read-modify-write operation on this
|
||||
+ * field when modifying the specified bits.
|
||||
+ *
|
||||
+ * +------- _RESUMING
|
||||
+ * |+------ _SAVING
|
||||
+ * ||+----- _RUNNING
|
||||
+ * |||
|
||||
+ * 000b => Device Stopped, not saving or resuming
|
||||
+ * 001b => Device running, which is the default state
|
||||
+ * 010b => Stop the device & save the device state, stop-and-copy state
|
||||
+ * 011b => Device running and save the device state, pre-copy state
|
||||
+ * 100b => Device stopped and the device state is resuming
|
||||
+ * 101b => Invalid state
|
||||
+ * 110b => Error state
|
||||
+ * 111b => Invalid state
|
||||
+ *
|
||||
+ * State transitions:
|
||||
+ *
|
||||
+ * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP
|
||||
+ * (100b) (001b) (011b) (010b) (000b)
|
||||
+ * 0. Running or default state
|
||||
+ * |
|
||||
+ *
|
||||
+ * 1. Normal Shutdown (optional)
|
||||
+ * |------------------------------------->|
|
||||
+ *
|
||||
+ * 2. Save the state or suspend
|
||||
+ * |------------------------->|---------->|
|
||||
+ *
|
||||
+ * 3. Save the state during live migration
|
||||
+ * |----------->|------------>|---------->|
|
||||
+ *
|
||||
+ * 4. Resuming
|
||||
+ * |<---------|
|
||||
+ *
|
||||
+ * 5. Resumed
|
||||
+ * |--------->|
|
||||
+ *
|
||||
+ * 0. Default state of VFIO device is _RUNNNG when the user application starts.
|
||||
+ * 1. During normal shutdown of the user application, the user application may
|
||||
+ * optionally change the VFIO device state from _RUNNING to _STOP. This
|
||||
+ * transition is optional. The vendor driver must support this transition but
|
||||
+ * must not require it.
|
||||
+ * 2. When the user application saves state or suspends the application, the
|
||||
+ * device state transitions from _RUNNING to stop-and-copy and then to _STOP.
|
||||
+ * On state transition from _RUNNING to stop-and-copy, driver must stop the
|
||||
+ * device, save the device state and send it to the application through the
|
||||
+ * migration region. The sequence to be followed for such transition is given
|
||||
+ * below.
|
||||
+ * 3. In live migration of user application, the state transitions from _RUNNING
|
||||
+ * to pre-copy, to stop-and-copy, and to _STOP.
|
||||
+ * On state transition from _RUNNING to pre-copy, the driver should start
|
||||
+ * gathering the device state while the application is still running and send
|
||||
+ * the device state data to application through the migration region.
|
||||
+ * On state transition from pre-copy to stop-and-copy, the driver must stop
|
||||
+ * the device, save the device state and send it to the user application
|
||||
+ * through the migration region.
|
||||
+ * Vendor drivers must support the pre-copy state even for implementations
|
||||
+ * where no data is provided to the user before the stop-and-copy state. The
|
||||
+ * user must not be required to consume all migration data before the device
|
||||
+ * transitions to a new state, including the stop-and-copy state.
|
||||
+ * The sequence to be followed for above two transitions is given below.
|
||||
+ * 4. To start the resuming phase, the device state should be transitioned from
|
||||
+ * the _RUNNING to the _RESUMING state.
|
||||
+ * In the _RESUMING state, the driver should use the device state data
|
||||
+ * received through the migration region to resume the device.
|
||||
+ * 5. After providing saved device data to the driver, the application should
|
||||
+ * change the state from _RESUMING to _RUNNING.
|
||||
+ *
|
||||
+ * reserved:
|
||||
+ * Reads on this field return zero and writes are ignored.
|
||||
+ *
|
||||
+ * pending_bytes: (read only)
|
||||
+ * The number of pending bytes still to be migrated from the vendor driver.
|
||||
+ *
|
||||
+ * data_offset: (read only)
|
||||
+ * The user application should read data_offset field from the migration
|
||||
+ * region. The user application should read the device data from this
|
||||
+ * offset within the migration region during the _SAVING state or write
|
||||
+ * the device data during the _RESUMING state. See below for details of
|
||||
+ * sequence to be followed.
|
||||
+ *
|
||||
+ * data_size: (read/write)
|
||||
+ * The user application should read data_size to get the size in bytes of
|
||||
+ * the data copied in the migration region during the _SAVING state and
|
||||
+ * write the size in bytes of the data copied in the migration region
|
||||
+ * during the _RESUMING state.
|
||||
+ *
|
||||
+ * The format of the migration region is as follows:
|
||||
+ * ------------------------------------------------------------------
|
||||
+ * |vfio_device_migration_info| data section |
|
||||
+ * | | /////////////////////////////// |
|
||||
+ * ------------------------------------------------------------------
|
||||
+ * ^ ^
|
||||
+ * offset 0-trapped part data_offset
|
||||
+ *
|
||||
+ * The structure vfio_device_migration_info is always followed by the data
|
||||
+ * section in the region, so data_offset will always be nonzero. The offset
|
||||
+ * from where the data is copied is decided by the kernel driver. The data
|
||||
+ * section can be trapped, mmapped, or partitioned, depending on how the kernel
|
||||
+ * driver defines the data section. The data section partition can be defined
|
||||
+ * as mapped by the sparse mmap capability. If mmapped, data_offset must be
|
||||
+ * page aligned, whereas initial section which contains the
|
||||
+ * vfio_device_migration_info structure, might not end at the offset, which is
|
||||
+ * page aligned. The user is not required to access through mmap regardless
|
||||
+ * of the capabilities of the region mmap.
|
||||
+ * The vendor driver should determine whether and how to partition the data
|
||||
+ * section. The vendor driver should return data_offset accordingly.
|
||||
+ *
|
||||
+ * The sequence to be followed while in pre-copy state and stop-and-copy state
|
||||
+ * is as follows:
|
||||
+ * a. Read pending_bytes, indicating the start of a new iteration to get device
|
||||
+ * data. Repeated read on pending_bytes at this stage should have no side
|
||||
+ * effects.
|
||||
+ * If pending_bytes == 0, the user application should not iterate to get data
|
||||
+ * for that device.
|
||||
+ * If pending_bytes > 0, perform the following steps.
|
||||
+ * b. Read data_offset, indicating that the vendor driver should make data
|
||||
+ * available through the data section. The vendor driver should return this
|
||||
+ * read operation only after data is available from (region + data_offset)
|
||||
+ * to (region + data_offset + data_size).
|
||||
+ * c. Read data_size, which is the amount of data in bytes available through
|
||||
+ * the migration region.
|
||||
+ * Read on data_offset and data_size should return the offset and size of
|
||||
+ * the current buffer if the user application reads data_offset and
|
||||
+ * data_size more than once here.
|
||||
+ * d. Read data_size bytes of data from (region + data_offset) from the
|
||||
+ * migration region.
|
||||
+ * e. Process the data.
|
||||
+ * f. Read pending_bytes, which indicates that the data from the previous
|
||||
+ * iteration has been read. If pending_bytes > 0, go to step b.
|
||||
+ *
|
||||
+ * The user application can transition from the _SAVING|_RUNNING
|
||||
+ * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the
|
||||
+ * number of pending bytes. The user application should iterate in _SAVING
|
||||
+ * (stop-and-copy) until pending_bytes is 0.
|
||||
+ *
|
||||
+ * The sequence to be followed while _RESUMING device state is as follows:
|
||||
+ * While data for this device is available, repeat the following steps:
|
||||
+ * a. Read data_offset from where the user application should write data.
|
||||
+ * b. Write migration data starting at the migration region + data_offset for
|
||||
+ * the length determined by data_size from the migration source.
|
||||
+ * c. Write data_size, which indicates to the vendor driver that data is
|
||||
+ * written in the migration region. Vendor driver must return this write
|
||||
+ * operations on consuming data. Vendor driver should apply the
|
||||
+ * user-provided migration region data to the device resume state.
|
||||
+ *
|
||||
+ * If an error occurs during the above sequences, the vendor driver can return
|
||||
+ * an error code for next read() or write() operation, which will terminate the
|
||||
+ * loop. The user application should then take the next necessary action, for
|
||||
+ * example, failing migration or terminating the user application.
|
||||
+ *
|
||||
+ * For the user application, data is opaque. The user application should write
|
||||
+ * data in the same order as the data is received and the data should be of
|
||||
+ * same transaction size at the source.
|
||||
*/
|
||||
-#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
|
||||
+
|
||||
+struct vfio_device_migration_info {
|
||||
+ __u32 device_state; /* VFIO device state */
|
||||
+#define VFIO_DEVICE_STATE_STOP (0)
|
||||
+#define VFIO_DEVICE_STATE_RUNNING (1 << 0)
|
||||
+#define VFIO_DEVICE_STATE_SAVING (1 << 1)
|
||||
+#define VFIO_DEVICE_STATE_RESUMING (1 << 2)
|
||||
+#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \
|
||||
+ VFIO_DEVICE_STATE_SAVING | \
|
||||
+ VFIO_DEVICE_STATE_RESUMING)
|
||||
+
|
||||
+#define VFIO_DEVICE_STATE_VALID(state) \
|
||||
+ (state & VFIO_DEVICE_STATE_RESUMING ? \
|
||||
+ (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1)
|
||||
+
|
||||
+#define VFIO_DEVICE_STATE_IS_ERROR(state) \
|
||||
+ ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \
|
||||
+ VFIO_DEVICE_STATE_RESUMING))
|
||||
+
|
||||
+#define VFIO_DEVICE_STATE_SET_ERROR(state) \
|
||||
+ ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \
|
||||
+ VFIO_DEVICE_STATE_RESUMING)
|
||||
+
|
||||
+ __u32 reserved;
|
||||
+ __u64 pending_bytes;
|
||||
+ __u64 data_offset;
|
||||
+ __u64 data_size;
|
||||
+};
|
||||
|
||||
/*
|
||||
* The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
|
||||
@@ -570,6 +807,7 @@ enum {
|
||||
|
||||
enum {
|
||||
VFIO_CCW_IO_IRQ_INDEX,
|
||||
+ VFIO_CCW_CRW_IRQ_INDEX,
|
||||
VFIO_CCW_NUM_IRQS
|
||||
};
|
||||
|
||||
@@ -700,6 +938,43 @@ struct vfio_device_ioeventfd {
|
||||
|
||||
#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
|
||||
|
||||
+/**
|
||||
+ * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17,
|
||||
+ * struct vfio_device_feature)
|
||||
+ *
|
||||
+ * Get, set, or probe feature data of the device. The feature is selected
|
||||
+ * using the FEATURE_MASK portion of the flags field. Support for a feature
|
||||
+ * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe
|
||||
+ * may optionally include the GET and/or SET bits to determine read vs write
|
||||
+ * access of the feature respectively. Probing a feature will return success
|
||||
+ * if the feature is supported and all of the optionally indicated GET/SET
|
||||
+ * methods are supported. The format of the data portion of the structure is
|
||||
+ * specific to the given feature. The data portion is not required for
|
||||
+ * probing. GET and SET are mutually exclusive, except for use with PROBE.
|
||||
+ *
|
||||
+ * Return 0 on success, -errno on failure.
|
||||
+ */
|
||||
+struct vfio_device_feature {
|
||||
+ __u32 argsz;
|
||||
+ __u32 flags;
|
||||
+#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */
|
||||
+#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */
|
||||
+#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */
|
||||
+#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */
|
||||
+ __u8 data[];
|
||||
+};
|
||||
+
|
||||
+#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17)
|
||||
+
|
||||
+/*
|
||||
+ * Provide support for setting a PCI VF Token, which is used as a shared
|
||||
+ * secret between PF and VF drivers. This feature may only be set on a
|
||||
+ * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing
|
||||
+ * open VFs. Data provided when setting this feature is a 16-byte array
|
||||
+ * (__u8 b[16]), representing a UUID.
|
||||
+ */
|
||||
+#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0)
|
||||
+
|
||||
/* -------- API for Type1 VFIO IOMMU -------- */
|
||||
|
||||
/**
|
||||
@@ -714,7 +989,54 @@ struct vfio_iommu_type1_info {
|
||||
__u32 argsz;
|
||||
__u32 flags;
|
||||
#define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */
|
||||
- __u64 iova_pgsizes; /* Bitmap of supported page sizes */
|
||||
+#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */
|
||||
+ __u64 iova_pgsizes; /* Bitmap of supported page sizes */
|
||||
+ __u32 cap_offset; /* Offset within info struct of first cap */
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * The IOVA capability allows to report the valid IOVA range(s)
|
||||
+ * excluding any non-relaxable reserved regions exposed by
|
||||
+ * devices attached to the container. Any DMA map attempt
|
||||
+ * outside the valid iova range will return error.
|
||||
+ *
|
||||
+ * The structures below define version 1 of this capability.
|
||||
+ */
|
||||
+#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1
|
||||
+
|
||||
+struct vfio_iova_range {
|
||||
+ __u64 start;
|
||||
+ __u64 end;
|
||||
+};
|
||||
+
|
||||
+struct vfio_iommu_type1_info_cap_iova_range {
|
||||
+ struct vfio_info_cap_header header;
|
||||
+ __u32 nr_iovas;
|
||||
+ __u32 reserved;
|
||||
+ struct vfio_iova_range iova_ranges[];
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * The migration capability allows to report supported features for migration.
|
||||
+ *
|
||||
+ * The structures below define version 1 of this capability.
|
||||
+ *
|
||||
+ * The existence of this capability indicates that IOMMU kernel driver supports
|
||||
+ * dirty page logging.
|
||||
+ *
|
||||
+ * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty
|
||||
+ * page logging.
|
||||
+ * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap
|
||||
+ * size in bytes that can be used by user applications when getting the dirty
|
||||
+ * bitmap.
|
||||
+ */
|
||||
+#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2
|
||||
+
|
||||
+struct vfio_iommu_type1_info_cap_migration {
|
||||
+ struct vfio_info_cap_header header;
|
||||
+ __u32 flags;
|
||||
+ __u64 pgsize_bitmap;
|
||||
+ __u64 max_dirty_bitmap_size; /* in bytes */
|
||||
};
|
||||
|
||||
#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
|
||||
@@ -737,6 +1059,12 @@ struct vfio_iommu_type1_dma_map {
|
||||
|
||||
#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
|
||||
|
||||
+struct vfio_bitmap {
|
||||
+ __u64 pgsize; /* page size for bitmap in bytes */
|
||||
+ __u64 size; /* in bytes */
|
||||
+ __u64 *data; /* one bit per page */
|
||||
+};
|
||||
+
|
||||
/**
|
||||
* VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
|
||||
* struct vfio_dma_unmap)
|
||||
@@ -746,12 +1074,23 @@ struct vfio_iommu_type1_dma_map {
|
||||
* field. No guarantee is made to the user that arbitrary unmaps of iova
|
||||
* or size different from those used in the original mapping call will
|
||||
* succeed.
|
||||
+ * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
|
||||
+ * before unmapping IO virtual addresses. When this flag is set, the user must
|
||||
+ * provide a struct vfio_bitmap in data[]. User must provide zero-allocated
|
||||
+ * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field.
|
||||
+ * A bit in the bitmap represents one page, of user provided page size in
|
||||
+ * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set
|
||||
+ * indicates that the page at that offset from iova is dirty. A Bitmap of the
|
||||
+ * pages in the range of unmapped size is returned in the user-provided
|
||||
+ * vfio_bitmap.data.
|
||||
*/
|
||||
struct vfio_iommu_type1_dma_unmap {
|
||||
__u32 argsz;
|
||||
__u32 flags;
|
||||
+#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
|
||||
__u64 iova; /* IO virtual address */
|
||||
__u64 size; /* Size of mapping (bytes) */
|
||||
+ __u8 data[];
|
||||
};
|
||||
|
||||
#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
|
||||
@@ -763,6 +1102,57 @@ struct vfio_iommu_type1_dma_unmap {
|
||||
#define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15)
|
||||
#define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16)
|
||||
|
||||
+/**
|
||||
+ * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
|
||||
+ * struct vfio_iommu_type1_dirty_bitmap)
|
||||
+ * IOCTL is used for dirty pages logging.
|
||||
+ * Caller should set flag depending on which operation to perform, details as
|
||||
+ * below:
|
||||
+ *
|
||||
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs
|
||||
+ * the IOMMU driver to log pages that are dirtied or potentially dirtied by
|
||||
+ * the device; designed to be used when a migration is in progress. Dirty pages
|
||||
+ * are logged until logging is disabled by user application by calling the IOCTL
|
||||
+ * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag.
|
||||
+ *
|
||||
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs
|
||||
+ * the IOMMU driver to stop logging dirtied pages.
|
||||
+ *
|
||||
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set
|
||||
+ * returns the dirty pages bitmap for IOMMU container for a given IOVA range.
|
||||
+ * The user must specify the IOVA range and the pgsize through the structure
|
||||
+ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface
|
||||
+ * supports getting a bitmap of the smallest supported pgsize only and can be
|
||||
+ * modified in future to get a bitmap of any specified supported pgsize. The
|
||||
+ * user must provide a zeroed memory area for the bitmap memory and specify its
|
||||
+ * size in bitmap.size. One bit is used to represent one page consecutively
|
||||
+ * starting from iova offset. The user should provide page size in bitmap.pgsize
|
||||
+ * field. A bit set in the bitmap indicates that the page at that offset from
|
||||
+ * iova is dirty. The caller must set argsz to a value including the size of
|
||||
+ * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the
|
||||
+ * actual bitmap. If dirty pages logging is not enabled, an error will be
|
||||
+ * returned.
|
||||
+ *
|
||||
+ * Only one of the flags _START, _STOP and _GET may be specified at a time.
|
||||
+ *
|
||||
+ */
|
||||
+struct vfio_iommu_type1_dirty_bitmap {
|
||||
+ __u32 argsz;
|
||||
+ __u32 flags;
|
||||
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0)
|
||||
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1)
|
||||
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2)
|
||||
+ __u8 data[];
|
||||
+};
|
||||
+
|
||||
+struct vfio_iommu_type1_dirty_bitmap_get {
|
||||
+ __u64 iova; /* IO virtual address */
|
||||
+ __u64 size; /* Size of iova range */
|
||||
+ struct vfio_bitmap bitmap;
|
||||
+};
|
||||
+
|
||||
+#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)
|
||||
+
|
||||
/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
|
||||
|
||||
/*
|
||||
--
|
||||
2.27.0
|
||||
|
||||
35
memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch
Normal file
35
memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch
Normal file
@ -0,0 +1,35 @@
|
||||
From 0ae8b3e05294fee99870efa9b58e22e16f31caf9 Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:20 +0530
|
||||
Subject: [PATCH] memory: Set DIRTY_MEMORY_MIGRATION when IOMMU is enabled
|
||||
|
||||
mr->ram_block is NULL when mr->is_iommu is true, then fr.dirty_log_mask
|
||||
wasn't set correctly due to which memory listener's log_sync doesn't
|
||||
get called.
|
||||
This patch returns log_mask with DIRTY_MEMORY_MIGRATION set when
|
||||
IOMMU is enabled.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
|
||||
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
memory.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/memory.c b/memory.c
|
||||
index 5d8c9a9234..44713efc66 100644
|
||||
--- a/memory.c
|
||||
+++ b/memory.c
|
||||
@@ -1825,7 +1825,7 @@ bool memory_region_is_ram_device(MemoryRegion *mr)
|
||||
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
|
||||
{
|
||||
uint8_t mask = mr->dirty_log_mask;
|
||||
- if (global_dirty_log && mr->ram_block) {
|
||||
+ if (global_dirty_log && (mr->ram_block || memory_region_is_iommu(mr))) {
|
||||
mask |= (1 << DIRTY_MEMORY_MIGRATION);
|
||||
}
|
||||
return mask;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
201
migration-register_savevm_live-doesn-t-need-dev.patch
Normal file
201
migration-register_savevm_live-doesn-t-need-dev.patch
Normal file
@ -0,0 +1,201 @@
|
||||
From 0f7cde69416f85ec3d3f57769ae38db3d72fda8c Mon Sep 17 00:00:00 2001
|
||||
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||
Date: Thu, 22 Aug 2019 12:54:33 +0100
|
||||
Subject: [PATCH] migration: register_savevm_live doesn't need dev
|
||||
|
||||
Commit 78dd48df3 removed the last caller of register_savevm_live for an
|
||||
instantiable device (rather than a single system wide device);
|
||||
so trim out the parameter.
|
||||
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <20190822115433.12070-1-dgilbert@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
docs/devel/migration.rst | 3 +--
|
||||
hw/ppc/spapr.c | 2 +-
|
||||
hw/s390x/s390-skeys.c | 2 +-
|
||||
hw/s390x/s390-stattrib.c | 2 +-
|
||||
hw/s390x/tod.c | 2 +-
|
||||
include/migration/register.h | 3 +--
|
||||
migration/block-dirty-bitmap.c | 2 +-
|
||||
migration/block.c | 2 +-
|
||||
migration/ram.c | 2 +-
|
||||
migration/savevm.c | 23 +----------------------
|
||||
net/slirp.c | 2 +-
|
||||
11 files changed, 11 insertions(+), 34 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
|
||||
index 220059679a..cc6f839fce 100644
|
||||
--- a/docs/devel/migration.rst
|
||||
+++ b/docs/devel/migration.rst
|
||||
@@ -183,8 +183,7 @@ another to load the state back.
|
||||
|
||||
.. code:: c
|
||||
|
||||
- int register_savevm_live(DeviceState *dev,
|
||||
- const char *idstr,
|
||||
+ int register_savevm_live(const char *idstr,
|
||||
int instance_id,
|
||||
int version_id,
|
||||
SaveVMHandlers *ops,
|
||||
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
|
||||
index b0f37c34a4..289967c3de 100644
|
||||
--- a/hw/ppc/spapr.c
|
||||
+++ b/hw/ppc/spapr.c
|
||||
@@ -3069,7 +3069,7 @@ static void spapr_machine_init(MachineState *machine)
|
||||
* interface, this is a legacy from the sPAPREnvironment structure
|
||||
* which predated MachineState but had a similar function */
|
||||
vmstate_register(NULL, 0, &vmstate_spapr, spapr);
|
||||
- register_savevm_live(NULL, "spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1,
|
||||
+ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1,
|
||||
&savevm_htab_handlers, spapr);
|
||||
|
||||
qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine),
|
||||
diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c
|
||||
index e5bd92c0c7..fb7d57865d 100644
|
||||
--- a/hw/s390x/s390-skeys.c
|
||||
+++ b/hw/s390x/s390-skeys.c
|
||||
@@ -388,7 +388,7 @@ static inline void s390_skeys_set_migration_enabled(Object *obj, bool value,
|
||||
ss->migration_enabled = value;
|
||||
|
||||
if (ss->migration_enabled) {
|
||||
- register_savevm_live(NULL, TYPE_S390_SKEYS, 0, 1,
|
||||
+ register_savevm_live(TYPE_S390_SKEYS, 0, 1,
|
||||
&savevm_s390_storage_keys, ss);
|
||||
} else {
|
||||
unregister_savevm(DEVICE(ss), TYPE_S390_SKEYS, ss);
|
||||
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
|
||||
index 766f2015a4..5ee15d5e82 100644
|
||||
--- a/hw/s390x/s390-stattrib.c
|
||||
+++ b/hw/s390x/s390-stattrib.c
|
||||
@@ -382,7 +382,7 @@ static void s390_stattrib_instance_init(Object *obj)
|
||||
{
|
||||
S390StAttribState *sas = S390_STATTRIB(obj);
|
||||
|
||||
- register_savevm_live(NULL, TYPE_S390_STATTRIB, 0, 0,
|
||||
+ register_savevm_live(TYPE_S390_STATTRIB, 0, 0,
|
||||
&savevm_s390_stattrib_handlers, sas);
|
||||
|
||||
object_property_add_bool(obj, "migration-enabled",
|
||||
diff --git a/hw/s390x/tod.c b/hw/s390x/tod.c
|
||||
index a9fca8eb0b..d6b22bb966 100644
|
||||
--- a/hw/s390x/tod.c
|
||||
+++ b/hw/s390x/tod.c
|
||||
@@ -100,7 +100,7 @@ static void s390_tod_realize(DeviceState *dev, Error **errp)
|
||||
S390TODState *td = S390_TOD(dev);
|
||||
|
||||
/* Legacy migration interface */
|
||||
- register_savevm_live(NULL, "todclock", 0, 1, &savevm_tod, td);
|
||||
+ register_savevm_live("todclock", 0, 1, &savevm_tod, td);
|
||||
}
|
||||
|
||||
static void s390_tod_class_init(ObjectClass *oc, void *data)
|
||||
diff --git a/include/migration/register.h b/include/migration/register.h
|
||||
index 8b2bc5b129..f3ba10b6ef 100644
|
||||
--- a/include/migration/register.h
|
||||
+++ b/include/migration/register.h
|
||||
@@ -68,8 +68,7 @@ typedef struct SaveVMHandlers {
|
||||
int (*resume_prepare)(MigrationState *s, void *opaque);
|
||||
} SaveVMHandlers;
|
||||
|
||||
-int register_savevm_live(DeviceState *dev,
|
||||
- const char *idstr,
|
||||
+int register_savevm_live(const char *idstr,
|
||||
uint32_t instance_id,
|
||||
int version_id,
|
||||
const SaveVMHandlers *ops,
|
||||
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
|
||||
index 4a896a09eb..11e8feb595 100644
|
||||
--- a/migration/block-dirty-bitmap.c
|
||||
+++ b/migration/block-dirty-bitmap.c
|
||||
@@ -733,7 +733,7 @@ void dirty_bitmap_mig_init(void)
|
||||
{
|
||||
QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
|
||||
|
||||
- register_savevm_live(NULL, "dirty-bitmap", 0, 1,
|
||||
+ register_savevm_live("dirty-bitmap", 0, 1,
|
||||
&savevm_dirty_bitmap_handlers,
|
||||
&dirty_bitmap_mig_state);
|
||||
}
|
||||
diff --git a/migration/block.c b/migration/block.c
|
||||
index 91f98ef44a..ec15d1d6b3 100644
|
||||
--- a/migration/block.c
|
||||
+++ b/migration/block.c
|
||||
@@ -1030,6 +1030,6 @@ void blk_mig_init(void)
|
||||
QSIMPLEQ_INIT(&block_mig_state.blk_list);
|
||||
qemu_mutex_init(&block_mig_state.lock);
|
||||
|
||||
- register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
|
||||
+ register_savevm_live("block", 0, 1, &savevm_block_handlers,
|
||||
&block_mig_state);
|
||||
}
|
||||
diff --git a/migration/ram.c b/migration/ram.c
|
||||
index d6657a8093..2077ba5be4 100644
|
||||
--- a/migration/ram.c
|
||||
+++ b/migration/ram.c
|
||||
@@ -5125,5 +5125,5 @@ static SaveVMHandlers savevm_ram_handlers = {
|
||||
void ram_mig_init(void)
|
||||
{
|
||||
qemu_mutex_init(&XBZRLE.lock);
|
||||
- register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
|
||||
+ register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
|
||||
}
|
||||
diff --git a/migration/savevm.c b/migration/savevm.c
|
||||
index f0974380e5..cdb79222a4 100644
|
||||
--- a/migration/savevm.c
|
||||
+++ b/migration/savevm.c
|
||||
@@ -683,8 +683,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse)
|
||||
of the system, so instance_id should be removed/replaced.
|
||||
Meanwhile pass -1 as instance_id if you do not already have a clearly
|
||||
distinguishing id for all instances of your device class. */
|
||||
-int register_savevm_live(DeviceState *dev,
|
||||
- const char *idstr,
|
||||
+int register_savevm_live(const char *idstr,
|
||||
uint32_t instance_id,
|
||||
int version_id,
|
||||
const SaveVMHandlers *ops,
|
||||
@@ -703,26 +702,6 @@ int register_savevm_live(DeviceState *dev,
|
||||
se->is_ram = 1;
|
||||
}
|
||||
|
||||
- if (dev) {
|
||||
- char *id = qdev_get_dev_path(dev);
|
||||
- if (id) {
|
||||
- if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
|
||||
- sizeof(se->idstr)) {
|
||||
- error_report("Path too long for VMState (%s)", id);
|
||||
- g_free(id);
|
||||
- g_free(se);
|
||||
-
|
||||
- return -1;
|
||||
- }
|
||||
- g_free(id);
|
||||
-
|
||||
- se->compat = g_new0(CompatEntry, 1);
|
||||
- pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr);
|
||||
- se->compat->instance_id = instance_id == -1 ?
|
||||
- calculate_compat_instance_id(idstr) : instance_id;
|
||||
- instance_id = -1;
|
||||
- }
|
||||
- }
|
||||
pstrcat(se->idstr, sizeof(se->idstr), idstr);
|
||||
|
||||
if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
|
||||
diff --git a/net/slirp.c b/net/slirp.c
|
||||
index b34cb29276..f42f496641 100644
|
||||
--- a/net/slirp.c
|
||||
+++ b/net/slirp.c
|
||||
@@ -576,7 +576,7 @@ static int net_slirp_init(NetClientState *peer, const char *model,
|
||||
* specific version?
|
||||
*/
|
||||
g_assert(slirp_state_version() == 4);
|
||||
- register_savevm_live(NULL, "slirp", 0, slirp_state_version(),
|
||||
+ register_savevm_live("slirp", 0, slirp_state_version(),
|
||||
&savevm_slirp_state, s->slirp);
|
||||
|
||||
s->poll_notifier.notify = net_slirp_poll_notify;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
214
qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch
Normal file
214
qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch
Normal file
@ -0,0 +1,214 @@
|
||||
From f97eaa27e2fb6b985f090af9acaa780bb6a2ee5b Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:27 +0530
|
||||
Subject: [PATCH] qapi: Add VFIO devices migration stats in Migration stats
|
||||
|
||||
Added amount of bytes transferred to the VM at destination by all VFIO
|
||||
devices
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/common.c | 19 +++++++++++++++++++
|
||||
hw/vfio/migration.c | 9 +++++++++
|
||||
include/hw/vfio/vfio-common.h | 3 +++
|
||||
migration/migration.c | 17 +++++++++++++++++
|
||||
monitor/hmp-cmds.c | 6 ++++++
|
||||
qapi/migration.json | 17 +++++++++++++++++
|
||||
6 files changed, 71 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index 4ce1c10734..a86a4c4506 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -291,6 +291,25 @@ const MemoryRegionOps vfio_region_ops = {
|
||||
* Device state interfaces
|
||||
*/
|
||||
|
||||
+bool vfio_mig_active(void)
|
||||
+{
|
||||
+ VFIOGroup *group;
|
||||
+ VFIODevice *vbasedev;
|
||||
+
|
||||
+ if (QLIST_EMPTY(&vfio_group_list)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
|
||||
+ if (vbasedev->migration_blocker) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container)
|
||||
{
|
||||
VFIOGroup *group;
|
||||
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||
index 0bdf6a1820..b77c66557e 100644
|
||||
--- a/hw/vfio/migration.c
|
||||
+++ b/hw/vfio/migration.c
|
||||
@@ -45,6 +45,8 @@
|
||||
#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
|
||||
#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
|
||||
|
||||
+static int64_t bytes_transferred;
|
||||
+
|
||||
static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
|
||||
off_t off, bool iswrite)
|
||||
{
|
||||
@@ -255,6 +257,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
|
||||
*size = data_size;
|
||||
}
|
||||
|
||||
+ bytes_transferred += data_size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -785,6 +788,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
|
||||
case MIGRATION_STATUS_CANCELLING:
|
||||
case MIGRATION_STATUS_CANCELLED:
|
||||
case MIGRATION_STATUS_FAILED:
|
||||
+ bytes_transferred = 0;
|
||||
ret = vfio_migration_set_state(vbasedev,
|
||||
~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
|
||||
VFIO_DEVICE_STATE_RUNNING);
|
||||
@@ -866,6 +870,11 @@ err:
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
+int64_t vfio_mig_bytes_transferred(void)
|
||||
+{
|
||||
+ return bytes_transferred;
|
||||
+}
|
||||
+
|
||||
int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
|
||||
{
|
||||
VFIOContainer *container = vbasedev->group->container;
|
||||
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||
index 8fd0212264..048731e81f 100644
|
||||
--- a/include/hw/vfio/vfio-common.h
|
||||
+++ b/include/hw/vfio/vfio-common.h
|
||||
@@ -203,6 +203,9 @@ extern const MemoryRegionOps vfio_region_ops;
|
||||
typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
|
||||
extern VFIOGroupList vfio_group_list;
|
||||
|
||||
+bool vfio_mig_active(void);
|
||||
+int64_t vfio_mig_bytes_transferred(void);
|
||||
+
|
||||
#ifdef CONFIG_LINUX
|
||||
int vfio_get_region_info(VFIODevice *vbasedev, int index,
|
||||
struct vfio_region_info **info);
|
||||
diff --git a/migration/migration.c b/migration/migration.c
|
||||
index b0b9430822..9faf5f63a6 100644
|
||||
--- a/migration/migration.c
|
||||
+++ b/migration/migration.c
|
||||
@@ -49,6 +49,10 @@
|
||||
#include "monitor/monitor.h"
|
||||
#include "net/announce.h"
|
||||
|
||||
+#ifdef CONFIG_VFIO
|
||||
+#include "hw/vfio/vfio-common.h"
|
||||
+#endif
|
||||
+
|
||||
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
|
||||
|
||||
/* Amount of time to allocate to each "chunk" of bandwidth-throttled
|
||||
@@ -908,6 +912,17 @@ static void populate_disk_info(MigrationInfo *info)
|
||||
}
|
||||
}
|
||||
|
||||
+static void populate_vfio_info(MigrationInfo *info)
|
||||
+{
|
||||
+#ifdef CONFIG_VFIO
|
||||
+ if (vfio_mig_active()) {
|
||||
+ info->has_vfio = true;
|
||||
+ info->vfio = g_malloc0(sizeof(*info->vfio));
|
||||
+ info->vfio->transferred = vfio_mig_bytes_transferred();
|
||||
+ }
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
static void fill_source_migration_info(MigrationInfo *info)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
@@ -941,6 +956,7 @@ static void fill_source_migration_info(MigrationInfo *info)
|
||||
|
||||
populate_ram_info(info, s);
|
||||
populate_disk_info(info);
|
||||
+ populate_vfio_info(info);
|
||||
break;
|
||||
case MIGRATION_STATUS_COLO:
|
||||
info->has_status = true;
|
||||
@@ -956,6 +972,7 @@ static void fill_source_migration_info(MigrationInfo *info)
|
||||
info->setup_time = s->setup_time;
|
||||
|
||||
populate_ram_info(info, s);
|
||||
+ populate_vfio_info(info);
|
||||
break;
|
||||
case MIGRATION_STATUS_FAILED:
|
||||
info->has_status = true;
|
||||
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
|
||||
index e5a7a88ba2..cecaae0a47 100644
|
||||
--- a/monitor/hmp-cmds.c
|
||||
+++ b/monitor/hmp-cmds.c
|
||||
@@ -370,6 +370,12 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
|
||||
}
|
||||
monitor_printf(mon, "]\n");
|
||||
}
|
||||
+
|
||||
+ if (info->has_vfio) {
|
||||
+ monitor_printf(mon, "vfio device transferred: %" PRIu64 " kbytes\n",
|
||||
+ info->vfio->transferred >> 10);
|
||||
+ }
|
||||
+
|
||||
qapi_free_MigrationInfo(info);
|
||||
qapi_free_MigrationCapabilityStatusList(caps);
|
||||
}
|
||||
diff --git a/qapi/migration.json b/qapi/migration.json
|
||||
index 587ef65872..1f0eb19ac6 100644
|
||||
--- a/qapi/migration.json
|
||||
+++ b/qapi/migration.json
|
||||
@@ -141,6 +141,18 @@
|
||||
'active', 'postcopy-active', 'postcopy-paused',
|
||||
'postcopy-recover', 'completed', 'failed', 'colo',
|
||||
'pre-switchover', 'device' ] }
|
||||
+##
|
||||
+# @VfioStats:
|
||||
+#
|
||||
+# Detailed VFIO devices migration statistics
|
||||
+#
|
||||
+# @transferred: amount of bytes transferred to the target VM by VFIO devices
|
||||
+#
|
||||
+# Since: 5.2
|
||||
+#
|
||||
+##
|
||||
+{ 'struct': 'VfioStats',
|
||||
+ 'data': {'transferred': 'int' } }
|
||||
|
||||
##
|
||||
# @MigrationInfo:
|
||||
@@ -202,11 +214,16 @@
|
||||
#
|
||||
# @socket-address: Only used for tcp, to know what the real port is (Since 4.0)
|
||||
#
|
||||
+# @vfio: @VfioStats containing detailed VFIO devices migration statistics,
|
||||
+# only returned if VFIO device is present, migration is supported by all
|
||||
+# VFIO devices and status is 'active' or 'completed' (since 5.2)
|
||||
+#
|
||||
# Since: 0.14.0
|
||||
##
|
||||
{ 'struct': 'MigrationInfo',
|
||||
'data': {'*status': 'MigrationStatus', '*ram': 'MigrationStats',
|
||||
'*disk': 'MigrationStats',
|
||||
+ '*vfio': 'VfioStats',
|
||||
'*xbzrle-cache': 'XBZRLECacheStats',
|
||||
'*total-time': 'int',
|
||||
'*expected-downtime': 'int',
|
||||
--
|
||||
2.27.0
|
||||
|
||||
48
qemu.spec
48
qemu.spec
@ -1,6 +1,6 @@
|
||||
Name: qemu
|
||||
Version: 4.1.0
|
||||
Release: 72
|
||||
Release: 73
|
||||
Epoch: 2
|
||||
Summary: QEMU is a generic and open source machine emulator and virtualizer
|
||||
License: GPLv2 and BSD and MIT and CC-BY-SA-4.0
|
||||
@ -476,6 +476,28 @@ Patch0463: virtio-input-fix-memory-leak-on-unrealize.patch
|
||||
Patch0464: target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch
|
||||
Patch0465: target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch
|
||||
Patch0466: target-arm-Update-the-ID-registers-of-Kunpeng-920.patch
|
||||
Patch0467: hw-net-fix-vmxnet3-live-migration.patch
|
||||
Patch0468: include-Make-headers-more-self-contained.patch
|
||||
Patch0469: migration-register_savevm_live-doesn-t-need-dev.patch
|
||||
Patch0470: vmstate-add-qom-interface-to-get-id.patch
|
||||
Patch0471: linux-headers-Update-against-Add-migration-support-f.patch
|
||||
Patch0472: vfio-Add-function-to-unmap-VFIO-region.patch
|
||||
Patch0473: vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch
|
||||
Patch0474: vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch
|
||||
Patch0475: vfio-Add-migration-region-initialization-and-finaliz.patch
|
||||
Patch0476: vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch
|
||||
Patch0477: vfio-Add-migration-state-change-notifier.patch
|
||||
Patch0478: vfio-Register-SaveVMHandlers-for-VFIO-device.patch
|
||||
Patch0479: vfio-Add-save-state-functions-to-SaveVMHandlers.patch
|
||||
Patch0480: vfio-Add-load-state-functions-to-SaveVMHandlers.patch
|
||||
Patch0481: memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch
|
||||
Patch0482: vfio-Get-migration-capability-flags-for-container.patch
|
||||
Patch0483: vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch
|
||||
Patch0484: vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch
|
||||
Patch0485: vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch
|
||||
Patch0486: vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch
|
||||
Patch0487: vfio-Make-vfio-pci-device-migration-capable.patch
|
||||
Patch0488: qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch
|
||||
|
||||
BuildRequires: flex
|
||||
BuildRequires: gcc
|
||||
@ -870,6 +892,30 @@ getent passwd qemu >/dev/null || \
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Thu Jul 29 2021 imxcc <xingchaochao@huawei.com>
|
||||
- hw/net: fix vmxnet3 live migration
|
||||
- include: Make headers more self-contained
|
||||
- migration: register_savevm_live doesn't need dev
|
||||
- vmstate: add qom interface to get id
|
||||
- linux headers: Update against "Add migration support for VFIO devices"
|
||||
- vfio: Add function to unmap VFIO region
|
||||
- vfio: Add vfio_get_object callback to VFIODeviceOps
|
||||
- vfio: Add save and load functions for VFIO PCI devices
|
||||
- vfio: Add migration region initialization and finalize function
|
||||
- vfio: Add VM state change handler to know state of VM
|
||||
- vfio: Add migration state change notifier
|
||||
- vfio: Register SaveVMHandlers for VFIO device
|
||||
- vfio: Add save state functions to SaveVMHandlers
|
||||
- vfio: Add load state functions to SaveVMHandlers
|
||||
- memory: Set DIRTY_MEMORY_MIGRATION when IOMMU is enabled
|
||||
- vfio: Get migration capability flags for container
|
||||
- vfio: Add function to start and stop dirty pages tracking
|
||||
- vfio: Add vfio_listener_log_sync to mark dirty pages
|
||||
- vfio: Dirty page tracking when vIOMMU is enabled
|
||||
- vfio: Add ioctl to get dirty pages bitmap during dma unmap
|
||||
- vfio: Make vfio-pci device migration capable
|
||||
- qapi: Add VFIO devices migration stats in Migration stats
|
||||
|
||||
* Wed Jul 28 2021 imxcc <xingchaochao@huawei.com>
|
||||
- object: return self in object_ref()
|
||||
- file-posix: Fix leaked fd in raw_open_common() error path
|
||||
|
||||
258
vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch
Normal file
258
vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch
Normal file
@ -0,0 +1,258 @@
|
||||
From 3a875293ae00266e1c82a5c382066efc4acc64ce Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:15 +0530
|
||||
Subject: [PATCH] vfio: Add VM state change handler to know state of VM
|
||||
|
||||
VM state change handler is called on change in VM's state. Based on
|
||||
VM state, VFIO device state should be changed.
|
||||
Added read/write helper functions for migration region.
|
||||
Added function to set device_state.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
[aw: lx -> HWADDR_PRIx, remove redundant parens]
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Signed-off-by: Shenming Lu <lushenming@huawei.com>
|
||||
---
|
||||
hw/vfio/migration.c | 160 ++++++++++++++++++++++++++++++++++
|
||||
hw/vfio/trace-events | 2 +
|
||||
include/hw/vfio/vfio-common.h | 4 +
|
||||
3 files changed, 166 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||
index fd7faf423c..ca82c78536 100644
|
||||
--- a/hw/vfio/migration.c
|
||||
+++ b/hw/vfio/migration.c
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include <linux/vfio.h>
|
||||
|
||||
+#include "sysemu/sysemu.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "cpu.h"
|
||||
#include "migration/migration.h"
|
||||
@@ -22,6 +23,157 @@
|
||||
#include "exec/ram_addr.h"
|
||||
#include "pci.h"
|
||||
#include "trace.h"
|
||||
+#include "hw/hw.h"
|
||||
+
|
||||
+static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
|
||||
+ off_t off, bool iswrite)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
|
||||
+ pread(vbasedev->fd, val, count, off);
|
||||
+ if (ret < count) {
|
||||
+ error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
|
||||
+ HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
|
||||
+ vbasedev->name, off, strerror(errno));
|
||||
+ return (ret < 0) ? ret : -EINVAL;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
|
||||
+ off_t off, bool iswrite)
|
||||
+{
|
||||
+ int ret, done = 0;
|
||||
+ __u8 *tbuf = buf;
|
||||
+
|
||||
+ while (count) {
|
||||
+ int bytes = 0;
|
||||
+
|
||||
+ if (count >= 8 && !(off % 8)) {
|
||||
+ bytes = 8;
|
||||
+ } else if (count >= 4 && !(off % 4)) {
|
||||
+ bytes = 4;
|
||||
+ } else if (count >= 2 && !(off % 2)) {
|
||||
+ bytes = 2;
|
||||
+ } else {
|
||||
+ bytes = 1;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ count -= bytes;
|
||||
+ done += bytes;
|
||||
+ off += bytes;
|
||||
+ tbuf += bytes;
|
||||
+ }
|
||||
+ return done;
|
||||
+}
|
||||
+
|
||||
+#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false)
|
||||
+#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true)
|
||||
+
|
||||
+#define VFIO_MIG_STRUCT_OFFSET(f) \
|
||||
+ offsetof(struct vfio_device_migration_info, f)
|
||||
+/*
|
||||
+ * Change the device_state register for device @vbasedev. Bits set in @mask
|
||||
+ * are preserved, bits set in @value are set, and bits not set in either @mask
|
||||
+ * or @value are cleared in device_state. If the register cannot be accessed,
|
||||
+ * the resulting state would be invalid, or the device enters an error state,
|
||||
+ * an error is returned.
|
||||
+ */
|
||||
+
|
||||
+static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
|
||||
+ uint32_t value)
|
||||
+{
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ VFIORegion *region = &migration->region;
|
||||
+ off_t dev_state_off = region->fd_offset +
|
||||
+ VFIO_MIG_STRUCT_OFFSET(device_state);
|
||||
+ uint32_t device_state;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
|
||||
+ dev_state_off);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ device_state = (device_state & mask) | value;
|
||||
+
|
||||
+ if (!VFIO_DEVICE_STATE_VALID(device_state)) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
|
||||
+ dev_state_off);
|
||||
+ if (ret < 0) {
|
||||
+ int rret;
|
||||
+
|
||||
+ rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
|
||||
+ dev_state_off);
|
||||
+
|
||||
+ if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
|
||||
+ hw_error("%s: Device in error state 0x%x", vbasedev->name,
|
||||
+ device_state);
|
||||
+ return rret ? rret : -EIO;
|
||||
+ }
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ migration->device_state = device_state;
|
||||
+ trace_vfio_migration_set_state(vbasedev->name, device_state);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void vfio_vmstate_change(void *opaque, int running, RunState state)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ uint32_t value, mask;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (vbasedev->migration->vm_running == running) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (running) {
|
||||
+ /*
|
||||
+ * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
|
||||
+ * Transition from _SAVING to _RUNNING can happen if there is migration
|
||||
+ * failure, in that case clear _SAVING bit.
|
||||
+ * Transition from _RESUMING to _RUNNING occurs during resuming
|
||||
+ * phase, in that case clear _RESUMING bit.
|
||||
+ * In both the above cases, set _RUNNING bit.
|
||||
+ */
|
||||
+ mask = ~VFIO_DEVICE_STATE_MASK;
|
||||
+ value = VFIO_DEVICE_STATE_RUNNING;
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
|
||||
+ * _RUNNING bit
|
||||
+ */
|
||||
+ mask = ~VFIO_DEVICE_STATE_RUNNING;
|
||||
+ value = 0;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_migration_set_state(vbasedev, mask, value);
|
||||
+ if (ret) {
|
||||
+ /*
|
||||
+ * Migration should be aborted in this case, but vm_state_notify()
|
||||
+ * currently does not support reporting failures.
|
||||
+ */
|
||||
+ error_report("%s: Failed to set device state 0x%x", vbasedev->name,
|
||||
+ (migration->device_state & mask) | value);
|
||||
+ qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
|
||||
+ }
|
||||
+ vbasedev->migration->vm_running = running;
|
||||
+ trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
|
||||
+ (migration->device_state & mask) | value);
|
||||
+}
|
||||
|
||||
static void vfio_migration_exit(VFIODevice *vbasedev)
|
||||
{
|
||||
@@ -38,6 +190,7 @@ static int vfio_migration_init(VFIODevice *vbasedev,
|
||||
{
|
||||
int ret;
|
||||
Object *obj;
|
||||
+ VFIOMigration *migration;
|
||||
|
||||
if (!vbasedev->ops->vfio_get_object) {
|
||||
return -EINVAL;
|
||||
@@ -64,6 +217,10 @@ static int vfio_migration_init(VFIODevice *vbasedev,
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
+
|
||||
+ migration = vbasedev->migration;
|
||||
+ migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
|
||||
+ vbasedev);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
@@ -111,6 +268,9 @@ add_blocker:
|
||||
void vfio_migration_finalize(VFIODevice *vbasedev)
|
||||
{
|
||||
if (vbasedev->migration) {
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+
|
||||
+ qemu_del_vm_change_state_handler(migration->vm_state);
|
||||
vfio_migration_exit(vbasedev);
|
||||
}
|
||||
|
||||
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||
index fd034ac536..1626862315 100644
|
||||
--- a/hw/vfio/trace-events
|
||||
+++ b/hw/vfio/trace-events
|
||||
@@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) ""
|
||||
|
||||
# migration.c
|
||||
vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
|
||||
+vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
|
||||
+vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
|
||||
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||
index e0482c2bac..533d6737ac 100644
|
||||
--- a/include/hw/vfio/vfio-common.h
|
||||
+++ b/include/hw/vfio/vfio-common.h
|
||||
@@ -29,6 +29,7 @@
|
||||
#ifdef CONFIG_LINUX
|
||||
#include <linux/vfio.h>
|
||||
#endif
|
||||
+#include "sysemu/sysemu.h"
|
||||
|
||||
#define VFIO_MSG_PREFIX "vfio %s: "
|
||||
|
||||
@@ -58,7 +59,10 @@ typedef struct VFIORegion {
|
||||
} VFIORegion;
|
||||
|
||||
typedef struct VFIOMigration {
|
||||
+ VMChangeStateEntry *vm_state;
|
||||
VFIORegion region;
|
||||
+ uint32_t device_state;
|
||||
+ int vm_running;
|
||||
} VFIOMigration;
|
||||
|
||||
typedef struct VFIOAddressSpace {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
83
vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch
Normal file
83
vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch
Normal file
@ -0,0 +1,83 @@
|
||||
From 4363ea5cded9c6d2838a9564b067f583a6ef077f Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:22 +0530
|
||||
Subject: [PATCH] vfio: Add function to start and stop dirty pages tracking
|
||||
|
||||
Call VFIO_IOMMU_DIRTY_PAGES ioctl to start and stop dirty pages tracking
|
||||
for VFIO devices.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/migration.c | 36 ++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 36 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||
index 0d2bd9e5cd..0bdf6a1820 100644
|
||||
--- a/hw/vfio/migration.c
|
||||
+++ b/hw/vfio/migration.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "qemu/main-loop.h"
|
||||
#include "qemu/cutils.h"
|
||||
#include <linux/vfio.h>
|
||||
+#include <sys/ioctl.h>
|
||||
|
||||
#include "sysemu/sysemu.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
@@ -391,10 +392,40 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
|
||||
return qemu_file_get_error(f);
|
||||
}
|
||||
|
||||
+static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start)
|
||||
+{
|
||||
+ int ret;
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ VFIOContainer *container = vbasedev->group->container;
|
||||
+ struct vfio_iommu_type1_dirty_bitmap dirty = {
|
||||
+ .argsz = sizeof(dirty),
|
||||
+ };
|
||||
+
|
||||
+ if (start) {
|
||||
+ if (migration->device_state & VFIO_DEVICE_STATE_SAVING) {
|
||||
+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
|
||||
+ } else {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ } else {
|
||||
+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
|
||||
+ if (ret) {
|
||||
+ error_report("Failed to set dirty tracking flag 0x%x errno: %d",
|
||||
+ dirty.flags, errno);
|
||||
+ return -errno;
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static void vfio_migration_cleanup(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
||||
+ vfio_set_dirty_page_tracking(vbasedev, false);
|
||||
+
|
||||
if (migration->region.mmaps) {
|
||||
vfio_region_unmap(&migration->region);
|
||||
}
|
||||
@@ -435,6 +466,11 @@ static int vfio_save_setup(QEMUFile *f, void *opaque)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+ ret = vfio_set_dirty_page_tracking(vbasedev, true);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
|
||||
ret = qemu_file_get_error(f);
|
||||
--
|
||||
2.27.0
|
||||
|
||||
103
vfio-Add-function-to-unmap-VFIO-region.patch
Normal file
103
vfio-Add-function-to-unmap-VFIO-region.patch
Normal file
@ -0,0 +1,103 @@
|
||||
From 68cc2be61588d14de2313342ee87eb0bb2b990e0 Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:11 +0530
|
||||
Subject: [PATCH] vfio: Add function to unmap VFIO region
|
||||
|
||||
This function will be used for migration region.
|
||||
Migration region is mmaped when migration starts and will be unmapped when
|
||||
migration is complete.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/common.c | 32 ++++++++++++++++++++++++++++----
|
||||
hw/vfio/trace-events | 1 +
|
||||
include/hw/vfio/vfio-common.h | 1 +
|
||||
3 files changed, 30 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index a859298fda..4c32b1bb99 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -906,6 +906,18 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void vfio_subregion_unmap(VFIORegion *region, int index)
|
||||
+{
|
||||
+ trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem),
|
||||
+ region->mmaps[index].offset,
|
||||
+ region->mmaps[index].offset +
|
||||
+ region->mmaps[index].size - 1);
|
||||
+ memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem);
|
||||
+ munmap(region->mmaps[index].mmap, region->mmaps[index].size);
|
||||
+ object_unparent(OBJECT(®ion->mmaps[index].mem));
|
||||
+ region->mmaps[index].mmap = NULL;
|
||||
+}
|
||||
+
|
||||
int vfio_region_mmap(VFIORegion *region)
|
||||
{
|
||||
int i, prot = 0;
|
||||
@@ -936,10 +948,7 @@ int vfio_region_mmap(VFIORegion *region)
|
||||
region->mmaps[i].mmap = NULL;
|
||||
|
||||
for (i--; i >= 0; i--) {
|
||||
- memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem);
|
||||
- munmap(region->mmaps[i].mmap, region->mmaps[i].size);
|
||||
- object_unparent(OBJECT(®ion->mmaps[i].mem));
|
||||
- region->mmaps[i].mmap = NULL;
|
||||
+ vfio_subregion_unmap(region, i);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -964,6 +973,21 @@ int vfio_region_mmap(VFIORegion *region)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+void vfio_region_unmap(VFIORegion *region)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ if (!region->mem) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < region->nr_mmaps; i++) {
|
||||
+ if (region->mmaps[i].mmap) {
|
||||
+ vfio_subregion_unmap(region, i);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
void vfio_region_exit(VFIORegion *region)
|
||||
{
|
||||
int i;
|
||||
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||
index b1ef55a33f..8cdc27946c 100644
|
||||
--- a/hw/vfio/trace-events
|
||||
+++ b/hw/vfio/trace-events
|
||||
@@ -111,6 +111,7 @@ vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Reg
|
||||
vfio_region_exit(const char *name, int index) "Device %s, region %d"
|
||||
vfio_region_finalize(const char *name, int index) "Device %s, region %d"
|
||||
vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
|
||||
+vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Region %s unmap [0x%lx - 0x%lx]"
|
||||
vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
|
||||
vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
|
||||
vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
|
||||
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||
index 9107bd41c0..93493891ba 100644
|
||||
--- a/include/hw/vfio/vfio-common.h
|
||||
+++ b/include/hw/vfio/vfio-common.h
|
||||
@@ -171,6 +171,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
|
||||
int index, const char *name);
|
||||
int vfio_region_mmap(VFIORegion *region);
|
||||
void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
|
||||
+void vfio_region_unmap(VFIORegion *region);
|
||||
void vfio_region_exit(VFIORegion *region);
|
||||
void vfio_region_finalize(VFIORegion *region);
|
||||
void vfio_reset_handler(void *opaque);
|
||||
--
|
||||
2.27.0
|
||||
|
||||
162
vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch
Normal file
162
vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch
Normal file
@ -0,0 +1,162 @@
|
||||
From 1333031bd3b488ed4904a61fd292cd5aa93f8c5b Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:25 +0530
|
||||
Subject: [PATCH] vfio: Add ioctl to get dirty pages bitmap during dma unmap
|
||||
|
||||
With vIOMMU, IO virtual address range can get unmapped while in pre-copy
|
||||
phase of migration. In that case, unmap ioctl should return pages pinned
|
||||
in that range and QEMU should find its correcponding guest physical
|
||||
addresses and report those dirty.
|
||||
|
||||
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
[aw: fix error_report types, fix cpu_physical_memory_set_dirty_lebitmap() cast]
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/common.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++--
|
||||
1 file changed, 93 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index 8773b998ac..4ce1c10734 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -320,11 +320,95 @@ static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container)
|
||||
return true;
|
||||
}
|
||||
|
||||
+static bool vfio_devices_all_running_and_saving(VFIOContainer *container)
|
||||
+{
|
||||
+ VFIOGroup *group;
|
||||
+ VFIODevice *vbasedev;
|
||||
+ MigrationState *ms = migrate_get_current();
|
||||
+
|
||||
+ if (!migration_is_setup_or_active(ms->state)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ QLIST_FOREACH(group, &container->group_list, container_next) {
|
||||
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+
|
||||
+ if (!migration) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) &&
|
||||
+ (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) {
|
||||
+ continue;
|
||||
+ } else {
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static int vfio_dma_unmap_bitmap(VFIOContainer *container,
|
||||
+ hwaddr iova, ram_addr_t size,
|
||||
+ IOMMUTLBEntry *iotlb)
|
||||
+{
|
||||
+ struct vfio_iommu_type1_dma_unmap *unmap;
|
||||
+ struct vfio_bitmap *bitmap;
|
||||
+ uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS;
|
||||
+ int ret;
|
||||
+
|
||||
+ unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
|
||||
+
|
||||
+ unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
|
||||
+ unmap->iova = iova;
|
||||
+ unmap->size = size;
|
||||
+ unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
|
||||
+ bitmap = (struct vfio_bitmap *)&unmap->data;
|
||||
+
|
||||
+ /*
|
||||
+ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
|
||||
+ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to
|
||||
+ * TARGET_PAGE_SIZE.
|
||||
+ */
|
||||
+
|
||||
+ bitmap->pgsize = TARGET_PAGE_SIZE;
|
||||
+ bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
|
||||
+ BITS_PER_BYTE;
|
||||
+
|
||||
+ if (bitmap->size > container->max_dirty_bitmap_size) {
|
||||
+ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64,
|
||||
+ (uint64_t)bitmap->size);
|
||||
+ ret = -E2BIG;
|
||||
+ goto unmap_exit;
|
||||
+ }
|
||||
+
|
||||
+ bitmap->data = g_try_malloc0(bitmap->size);
|
||||
+ if (!bitmap->data) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto unmap_exit;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
|
||||
+ if (!ret) {
|
||||
+ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
|
||||
+ iotlb->translated_addr, pages);
|
||||
+ } else {
|
||||
+ error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
|
||||
+ }
|
||||
+
|
||||
+ g_free(bitmap->data);
|
||||
+unmap_exit:
|
||||
+ g_free(unmap);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
|
||||
*/
|
||||
static int vfio_dma_unmap(VFIOContainer *container,
|
||||
- hwaddr iova, ram_addr_t size)
|
||||
+ hwaddr iova, ram_addr_t size,
|
||||
+ IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
struct vfio_iommu_type1_dma_unmap unmap = {
|
||||
.argsz = sizeof(unmap),
|
||||
@@ -333,6 +417,11 @@ static int vfio_dma_unmap(VFIOContainer *container,
|
||||
.size = size,
|
||||
};
|
||||
|
||||
+ if (iotlb && container->dirty_pages_supported &&
|
||||
+ vfio_devices_all_running_and_saving(container)) {
|
||||
+ return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
|
||||
+ }
|
||||
+
|
||||
while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
|
||||
/*
|
||||
* The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
|
||||
@@ -380,7 +469,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
|
||||
* the VGA ROM space.
|
||||
*/
|
||||
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
|
||||
- (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
|
||||
+ (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
|
||||
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
|
||||
return 0;
|
||||
}
|
||||
@@ -530,7 +619,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
iotlb->addr_mask + 1, vaddr, ret);
|
||||
}
|
||||
} else {
|
||||
- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
|
||||
+ ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%m)",
|
||||
@@ -816,7 +905,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
}
|
||||
|
||||
if (try_unmap) {
|
||||
- ret = vfio_dma_unmap(container, iova, int128_get64(llsize));
|
||||
+ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%m)",
|
||||
--
|
||||
2.27.0
|
||||
|
||||
266
vfio-Add-load-state-functions-to-SaveVMHandlers.patch
Normal file
266
vfio-Add-load-state-functions-to-SaveVMHandlers.patch
Normal file
@ -0,0 +1,266 @@
|
||||
From ddef5d5257987f2f415ce41fdc482feda61aa796 Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:19 +0530
|
||||
Subject: [PATCH] vfio: Add load state functions to SaveVMHandlers
|
||||
|
||||
Sequence during _RESUMING device state:
|
||||
While data for this device is available, repeat below steps:
|
||||
a. read data_offset from where user application should write data.
|
||||
b. write data of data_size to migration region from data_offset.
|
||||
c. write data_size which indicates vendor driver that data is written in
|
||||
staging buffer.
|
||||
|
||||
For user, data is opaque. User should write data in the same order as
|
||||
received.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/migration.c | 195 +++++++++++++++++++++++++++++++++++++++++++
|
||||
hw/vfio/trace-events | 4 +
|
||||
2 files changed, 199 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||
index f78a77e1e3..954c064435 100644
|
||||
--- a/hw/vfio/migration.c
|
||||
+++ b/hw/vfio/migration.c
|
||||
@@ -257,6 +257,77 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
|
||||
+ uint64_t data_size)
|
||||
+{
|
||||
+ VFIORegion *region = &vbasedev->migration->region;
|
||||
+ uint64_t data_offset = 0, size, report_size;
|
||||
+ int ret;
|
||||
+
|
||||
+ do {
|
||||
+ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
|
||||
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ if (data_offset + data_size > region->size) {
|
||||
+ /*
|
||||
+ * If data_size is greater than the data section of migration region
|
||||
+ * then iterate the write buffer operation. This case can occur if
|
||||
+ * size of migration region at destination is smaller than size of
|
||||
+ * migration region at source.
|
||||
+ */
|
||||
+ report_size = size = region->size - data_offset;
|
||||
+ data_size -= size;
|
||||
+ } else {
|
||||
+ report_size = size = data_size;
|
||||
+ data_size = 0;
|
||||
+ }
|
||||
+
|
||||
+ trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
|
||||
+
|
||||
+ while (size) {
|
||||
+ void *buf;
|
||||
+ uint64_t sec_size;
|
||||
+ bool buf_alloc = false;
|
||||
+
|
||||
+ buf = get_data_section_size(region, data_offset, size, &sec_size);
|
||||
+
|
||||
+ if (!buf) {
|
||||
+ buf = g_try_malloc(sec_size);
|
||||
+ if (!buf) {
|
||||
+ error_report("%s: Error allocating buffer ", __func__);
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+ buf_alloc = true;
|
||||
+ }
|
||||
+
|
||||
+ qemu_get_buffer(f, buf, sec_size);
|
||||
+
|
||||
+ if (buf_alloc) {
|
||||
+ ret = vfio_mig_write(vbasedev, buf, sec_size,
|
||||
+ region->fd_offset + data_offset);
|
||||
+ g_free(buf);
|
||||
+
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+ size -= sec_size;
|
||||
+ data_offset += sec_size;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
|
||||
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ } while (data_size);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int vfio_update_pending(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
@@ -293,6 +364,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
|
||||
return qemu_file_get_error(f);
|
||||
}
|
||||
|
||||
+static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+ uint64_t data;
|
||||
+
|
||||
+ if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = vbasedev->ops->vfio_load_config(vbasedev, f);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to load device config space",
|
||||
+ vbasedev->name);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ data = qemu_get_be64(f);
|
||||
+ if (data != VFIO_MIG_FLAG_END_OF_STATE) {
|
||||
+ error_report("%s: Failed loading device config space, "
|
||||
+ "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ trace_vfio_load_device_config_state(vbasedev->name);
|
||||
+ return qemu_file_get_error(f);
|
||||
+}
|
||||
+
|
||||
static void vfio_migration_cleanup(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
@@ -483,12 +581,109 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int vfio_load_setup(QEMUFile *f, void *opaque)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ if (migration->region.mmaps) {
|
||||
+ ret = vfio_region_mmap(&migration->region);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to mmap VFIO migration region %d: %s",
|
||||
+ vbasedev->name, migration->region.nr,
|
||||
+ strerror(-ret));
|
||||
+ error_report("%s: Falling back to slow path", vbasedev->name);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
|
||||
+ VFIO_DEVICE_STATE_RESUMING);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to set state RESUMING", vbasedev->name);
|
||||
+ if (migration->region.mmaps) {
|
||||
+ vfio_region_unmap(&migration->region);
|
||||
+ }
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int vfio_load_cleanup(void *opaque)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+
|
||||
+ vfio_migration_cleanup(vbasedev);
|
||||
+ trace_vfio_load_cleanup(vbasedev->name);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+ int ret = 0;
|
||||
+ uint64_t data;
|
||||
+
|
||||
+ data = qemu_get_be64(f);
|
||||
+ while (data != VFIO_MIG_FLAG_END_OF_STATE) {
|
||||
+
|
||||
+ trace_vfio_load_state(vbasedev->name, data);
|
||||
+
|
||||
+ switch (data) {
|
||||
+ case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
|
||||
+ {
|
||||
+ ret = vfio_load_device_config_state(f, opaque);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ case VFIO_MIG_FLAG_DEV_SETUP_STATE:
|
||||
+ {
|
||||
+ data = qemu_get_be64(f);
|
||||
+ if (data == VFIO_MIG_FLAG_END_OF_STATE) {
|
||||
+ return ret;
|
||||
+ } else {
|
||||
+ error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
|
||||
+ vbasedev->name, data);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ case VFIO_MIG_FLAG_DEV_DATA_STATE:
|
||||
+ {
|
||||
+ uint64_t data_size = qemu_get_be64(f);
|
||||
+
|
||||
+ if (data_size) {
|
||||
+ ret = vfio_load_buffer(f, vbasedev, data_size);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ default:
|
||||
+ error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ data = qemu_get_be64(f);
|
||||
+ ret = qemu_file_get_error(f);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static SaveVMHandlers savevm_vfio_handlers = {
|
||||
.save_setup = vfio_save_setup,
|
||||
.save_cleanup = vfio_save_cleanup,
|
||||
.save_live_pending = vfio_save_pending,
|
||||
.save_live_iterate = vfio_save_iterate,
|
||||
.save_live_complete_precopy = vfio_save_complete_precopy,
|
||||
+ .load_setup = vfio_load_setup,
|
||||
+ .load_cleanup = vfio_load_cleanup,
|
||||
+ .load_state = vfio_load_state,
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||
index 9a1c5e17d9..4f08f5a633 100644
|
||||
--- a/hw/vfio/trace-events
|
||||
+++ b/hw/vfio/trace-events
|
||||
@@ -157,3 +157,7 @@ vfio_save_device_config_state(const char *name) " (%s)"
|
||||
vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
|
||||
vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
|
||||
vfio_save_complete_precopy(const char *name) " (%s)"
|
||||
+vfio_load_device_config_state(const char *name) " (%s)"
|
||||
+vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
|
||||
+vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
|
||||
+vfio_load_cleanup(const char *name) " (%s)"
|
||||
--
|
||||
2.27.0
|
||||
|
||||
209
vfio-Add-migration-region-initialization-and-finaliz.patch
Normal file
209
vfio-Add-migration-region-initialization-and-finaliz.patch
Normal file
@ -0,0 +1,209 @@
|
||||
From b7128f8aa03482634c07691cef69e7ed2d35200e Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:14 +0530
|
||||
Subject: [PATCH] vfio: Add migration region initialization and finalize
|
||||
function
|
||||
|
||||
Whether the VFIO device supports migration or not is decided based of
|
||||
migration region query. If migration region query is successful and migration
|
||||
region initialization is successful then migration is supported else
|
||||
migration is blocked.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Signed-off-by: Shenming Lu <lushenming@huawei.com>
|
||||
---
|
||||
hw/vfio/Makefile.objs | 2 +-
|
||||
hw/vfio/migration.c | 122 ++++++++++++++++++++++++++++++++++
|
||||
hw/vfio/trace-events | 3 +
|
||||
include/hw/vfio/vfio-common.h | 9 +++
|
||||
4 files changed, 135 insertions(+), 1 deletion(-)
|
||||
create mode 100644 hw/vfio/migration.c
|
||||
|
||||
diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
|
||||
index abad8b818c..36033d1437 100644
|
||||
--- a/hw/vfio/Makefile.objs
|
||||
+++ b/hw/vfio/Makefile.objs
|
||||
@@ -1,4 +1,4 @@
|
||||
-obj-y += common.o spapr.o
|
||||
+obj-y += common.o spapr.o migration.o
|
||||
obj-$(CONFIG_VFIO_PCI) += pci.o pci-quirks.o display.o
|
||||
obj-$(CONFIG_VFIO_CCW) += ccw.o
|
||||
obj-$(CONFIG_VFIO_PLATFORM) += platform.o
|
||||
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||
new file mode 100644
|
||||
index 0000000000..fd7faf423c
|
||||
--- /dev/null
|
||||
+++ b/hw/vfio/migration.c
|
||||
@@ -0,0 +1,122 @@
|
||||
+/*
|
||||
+ * Migration support for VFIO devices
|
||||
+ *
|
||||
+ * Copyright NVIDIA, Inc. 2020
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
+ * the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include <linux/vfio.h>
|
||||
+
|
||||
+#include "hw/vfio/vfio-common.h"
|
||||
+#include "cpu.h"
|
||||
+#include "migration/migration.h"
|
||||
+#include "migration/qemu-file.h"
|
||||
+#include "migration/register.h"
|
||||
+#include "migration/blocker.h"
|
||||
+#include "migration/misc.h"
|
||||
+#include "qapi/error.h"
|
||||
+#include "exec/ramlist.h"
|
||||
+#include "exec/ram_addr.h"
|
||||
+#include "pci.h"
|
||||
+#include "trace.h"
|
||||
+
|
||||
+static void vfio_migration_exit(VFIODevice *vbasedev)
|
||||
+{
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+
|
||||
+ vfio_region_exit(&migration->region);
|
||||
+ vfio_region_finalize(&migration->region);
|
||||
+ g_free(vbasedev->migration);
|
||||
+ vbasedev->migration = NULL;
|
||||
+}
|
||||
+
|
||||
+static int vfio_migration_init(VFIODevice *vbasedev,
|
||||
+ struct vfio_region_info *info)
|
||||
+{
|
||||
+ int ret;
|
||||
+ Object *obj;
|
||||
+
|
||||
+ if (!vbasedev->ops->vfio_get_object) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ obj = vbasedev->ops->vfio_get_object(vbasedev);
|
||||
+ if (!obj) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ vbasedev->migration = g_new0(VFIOMigration, 1);
|
||||
+
|
||||
+ ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
|
||||
+ info->index, "migration");
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to setup VFIO migration region %d: %s",
|
||||
+ vbasedev->name, info->index, strerror(-ret));
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
+ if (!vbasedev->migration->region.size) {
|
||||
+ error_report("%s: Invalid zero-sized VFIO migration region %d",
|
||||
+ vbasedev->name, info->index);
|
||||
+ ret = -EINVAL;
|
||||
+ goto err;
|
||||
+ }
|
||||
+ return 0;
|
||||
+
|
||||
+err:
|
||||
+ vfio_migration_exit(vbasedev);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+/* ---------------------------------------------------------------------- */
|
||||
+
|
||||
+int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
|
||||
+{
|
||||
+ struct vfio_region_info *info = NULL;
|
||||
+ Error *local_err = NULL;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
|
||||
+ VFIO_REGION_SUBTYPE_MIGRATION, &info);
|
||||
+ if (ret) {
|
||||
+ goto add_blocker;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_migration_init(vbasedev, info);
|
||||
+ if (ret) {
|
||||
+ goto add_blocker;
|
||||
+ }
|
||||
+
|
||||
+ g_free(info);
|
||||
+ trace_vfio_migration_probe(vbasedev->name, info->index);
|
||||
+ return 0;
|
||||
+
|
||||
+add_blocker:
|
||||
+ error_setg(&vbasedev->migration_blocker,
|
||||
+ "VFIO device doesn't support migration");
|
||||
+ g_free(info);
|
||||
+
|
||||
+ ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err);
|
||||
+ if (local_err) {
|
||||
+ error_propagate(errp, local_err);
|
||||
+ error_free(vbasedev->migration_blocker);
|
||||
+ vbasedev->migration_blocker = NULL;
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+void vfio_migration_finalize(VFIODevice *vbasedev)
|
||||
+{
|
||||
+ if (vbasedev->migration) {
|
||||
+ vfio_migration_exit(vbasedev);
|
||||
+ }
|
||||
+
|
||||
+ if (vbasedev->migration_blocker) {
|
||||
+ migrate_del_blocker(vbasedev->migration_blocker);
|
||||
+ error_free(vbasedev->migration_blocker);
|
||||
+ vbasedev->migration_blocker = NULL;
|
||||
+ }
|
||||
+}
|
||||
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||
index 8cdc27946c..fd034ac536 100644
|
||||
--- a/hw/vfio/trace-events
|
||||
+++ b/hw/vfio/trace-events
|
||||
@@ -143,3 +143,6 @@ vfio_display_edid_link_up(void) ""
|
||||
vfio_display_edid_link_down(void) ""
|
||||
vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u"
|
||||
vfio_display_edid_write_error(void) ""
|
||||
+
|
||||
+# migration.c
|
||||
+vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
|
||||
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||
index 6ea4898c4d..e0482c2bac 100644
|
||||
--- a/include/hw/vfio/vfio-common.h
|
||||
+++ b/include/hw/vfio/vfio-common.h
|
||||
@@ -57,6 +57,10 @@ typedef struct VFIORegion {
|
||||
uint8_t nr; /* cache the region number for debug */
|
||||
} VFIORegion;
|
||||
|
||||
+typedef struct VFIOMigration {
|
||||
+ VFIORegion region;
|
||||
+} VFIOMigration;
|
||||
+
|
||||
typedef struct VFIOAddressSpace {
|
||||
AddressSpace *as;
|
||||
QLIST_HEAD(, VFIOContainer) containers;
|
||||
@@ -113,6 +117,8 @@ typedef struct VFIODevice {
|
||||
unsigned int num_irqs;
|
||||
unsigned int num_regions;
|
||||
unsigned int flags;
|
||||
+ VFIOMigration *migration;
|
||||
+ Error *migration_blocker;
|
||||
} VFIODevice;
|
||||
|
||||
struct VFIODeviceOps {
|
||||
@@ -204,4 +210,7 @@ int vfio_spapr_create_window(VFIOContainer *container,
|
||||
int vfio_spapr_remove_window(VFIOContainer *container,
|
||||
hwaddr offset_within_address_space);
|
||||
|
||||
+int vfio_migration_probe(VFIODevice *vbasedev, Error **errp);
|
||||
+void vfio_migration_finalize(VFIODevice *vbasedev);
|
||||
+
|
||||
#endif /* HW_VFIO_VFIO_COMMON_H */
|
||||
--
|
||||
2.27.0
|
||||
|
||||
104
vfio-Add-migration-state-change-notifier.patch
Normal file
104
vfio-Add-migration-state-change-notifier.patch
Normal file
@ -0,0 +1,104 @@
|
||||
From b61729a5e0ab89d29f041202b50d042405076e62 Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:16 +0530
|
||||
Subject: [PATCH] vfio: Add migration state change notifier
|
||||
|
||||
Added migration state change notifier to get notification on migration state
|
||||
change. These states are translated to VFIO device state and conveyed to
|
||||
vendor driver.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/migration.c | 28 ++++++++++++++++++++++++++++
|
||||
hw/vfio/trace-events | 1 +
|
||||
include/hw/vfio/vfio-common.h | 2 ++
|
||||
3 files changed, 31 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||
index ca82c78536..0c6c9b655f 100644
|
||||
--- a/hw/vfio/migration.c
|
||||
+++ b/hw/vfio/migration.c
|
||||
@@ -175,6 +175,30 @@ static void vfio_vmstate_change(void *opaque, int running, RunState state)
|
||||
(migration->device_state & mask) | value);
|
||||
}
|
||||
|
||||
+static void vfio_migration_state_notifier(Notifier *notifier, void *data)
|
||||
+{
|
||||
+ MigrationState *s = data;
|
||||
+ VFIOMigration *migration = container_of(notifier, VFIOMigration,
|
||||
+ migration_state);
|
||||
+ VFIODevice *vbasedev = migration->vbasedev;
|
||||
+ int ret;
|
||||
+
|
||||
+ trace_vfio_migration_state_notifier(vbasedev->name,
|
||||
+ MigrationStatus_str(s->state));
|
||||
+
|
||||
+ switch (s->state) {
|
||||
+ case MIGRATION_STATUS_CANCELLING:
|
||||
+ case MIGRATION_STATUS_CANCELLED:
|
||||
+ case MIGRATION_STATUS_FAILED:
|
||||
+ ret = vfio_migration_set_state(vbasedev,
|
||||
+ ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
|
||||
+ VFIO_DEVICE_STATE_RUNNING);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to set state RUNNING", vbasedev->name);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void vfio_migration_exit(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
@@ -219,8 +243,11 @@ static int vfio_migration_init(VFIODevice *vbasedev,
|
||||
}
|
||||
|
||||
migration = vbasedev->migration;
|
||||
+ migration->vbasedev = vbasedev;
|
||||
migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
|
||||
vbasedev);
|
||||
+ migration->migration_state.notify = vfio_migration_state_notifier;
|
||||
+ add_migration_state_change_notifier(&migration->migration_state);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
@@ -270,6 +297,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev)
|
||||
if (vbasedev->migration) {
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
||||
+ remove_migration_state_change_notifier(&migration->migration_state);
|
||||
qemu_del_vm_change_state_handler(migration->vm_state);
|
||||
vfio_migration_exit(vbasedev);
|
||||
}
|
||||
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||
index 1626862315..bd3d47b005 100644
|
||||
--- a/hw/vfio/trace-events
|
||||
+++ b/hw/vfio/trace-events
|
||||
@@ -148,3 +148,4 @@ vfio_display_edid_write_error(void) ""
|
||||
vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
|
||||
vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
|
||||
vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
|
||||
+vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
|
||||
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||
index 533d6737ac..efff0590ae 100644
|
||||
--- a/include/hw/vfio/vfio-common.h
|
||||
+++ b/include/hw/vfio/vfio-common.h
|
||||
@@ -59,10 +59,12 @@ typedef struct VFIORegion {
|
||||
} VFIORegion;
|
||||
|
||||
typedef struct VFIOMigration {
|
||||
+ struct VFIODevice *vbasedev;
|
||||
VMChangeStateEntry *vm_state;
|
||||
VFIORegion region;
|
||||
uint32_t device_state;
|
||||
int vm_running;
|
||||
+ Notifier migration_state;
|
||||
} VFIOMigration;
|
||||
|
||||
typedef struct VFIOAddressSpace {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
106
vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch
Normal file
106
vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch
Normal file
@ -0,0 +1,106 @@
|
||||
From 92f104ca6e35acae079ca3bb432f24452058d483 Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:13 +0530
|
||||
Subject: [PATCH] vfio: Add save and load functions for VFIO PCI devices
|
||||
|
||||
Added functions to save and restore PCI device specific data,
|
||||
specifically config space of PCI device.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/pci.c | 51 +++++++++++++++++++++++++++++++++++
|
||||
include/hw/vfio/vfio-common.h | 2 ++
|
||||
2 files changed, 53 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||
index de0d286fc9..b9fae3ad28 100644
|
||||
--- a/hw/vfio/pci.c
|
||||
+++ b/hw/vfio/pci.c
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "pci.h"
|
||||
#include "trace.h"
|
||||
#include "qapi/error.h"
|
||||
+#include "migration/qemu-file.h"
|
||||
|
||||
#define TYPE_VFIO_PCI "vfio-pci"
|
||||
#define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
|
||||
@@ -2395,11 +2396,61 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev)
|
||||
return OBJECT(vdev);
|
||||
}
|
||||
|
||||
+static bool vfio_msix_present(void *opaque, int version_id)
|
||||
+{
|
||||
+ PCIDevice *pdev = opaque;
|
||||
+
|
||||
+ return msix_present(pdev);
|
||||
+}
|
||||
+
|
||||
+const VMStateDescription vmstate_vfio_pci_config = {
|
||||
+ .name = "VFIOPCIDevice",
|
||||
+ .version_id = 1,
|
||||
+ .minimum_version_id = 1,
|
||||
+ .fields = (VMStateField[]) {
|
||||
+ VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
|
||||
+ VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present),
|
||||
+ VMSTATE_END_OF_LIST()
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
|
||||
+{
|
||||
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||
+
|
||||
+ vmstate_save_state(f, &vmstate_vfio_pci_config, vdev, NULL);
|
||||
+}
|
||||
+
|
||||
+static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
|
||||
+{
|
||||
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||
+ PCIDevice *pdev = &vdev->pdev;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ vfio_pci_write_config(pdev, PCI_COMMAND,
|
||||
+ pci_get_word(pdev->config + PCI_COMMAND), 2);
|
||||
+
|
||||
+ if (msi_enabled(pdev)) {
|
||||
+ vfio_msi_enable(vdev);
|
||||
+ } else if (msix_enabled(pdev)) {
|
||||
+ vfio_msix_enable(vdev);
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static VFIODeviceOps vfio_pci_ops = {
|
||||
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
|
||||
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
|
||||
.vfio_eoi = vfio_intx_eoi,
|
||||
.vfio_get_object = vfio_pci_get_object,
|
||||
+ .vfio_save_config = vfio_pci_save_config,
|
||||
+ .vfio_load_config = vfio_pci_load_config,
|
||||
};
|
||||
|
||||
int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
|
||||
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||
index 771b6d59a3..6ea4898c4d 100644
|
||||
--- a/include/hw/vfio/vfio-common.h
|
||||
+++ b/include/hw/vfio/vfio-common.h
|
||||
@@ -120,6 +120,8 @@ struct VFIODeviceOps {
|
||||
int (*vfio_hot_reset_multi)(VFIODevice *vdev);
|
||||
void (*vfio_eoi)(VFIODevice *vdev);
|
||||
Object *(*vfio_get_object)(VFIODevice *vdev);
|
||||
+ void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
|
||||
+ int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
|
||||
};
|
||||
|
||||
typedef struct VFIOGroup {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
380
vfio-Add-save-state-functions-to-SaveVMHandlers.patch
Normal file
380
vfio-Add-save-state-functions-to-SaveVMHandlers.patch
Normal file
@ -0,0 +1,380 @@
|
||||
From 94f106f95e887d1d706e8f771fd6ad287ddac2dc Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:18 +0530
|
||||
Subject: [PATCH] vfio: Add save state functions to SaveVMHandlers
|
||||
|
||||
Added .save_live_pending, .save_live_iterate and .save_live_complete_precopy
|
||||
functions. These functions handles pre-copy and stop-and-copy phase.
|
||||
|
||||
In _SAVING|_RUNNING device state or pre-copy phase:
|
||||
- read pending_bytes. If pending_bytes > 0, go through below steps.
|
||||
- read data_offset - indicates kernel driver to write data to staging
|
||||
buffer.
|
||||
- read data_size - amount of data in bytes written by vendor driver in
|
||||
migration region.
|
||||
- read data_size bytes of data from data_offset in the migration region.
|
||||
- Write data packet to file stream as below:
|
||||
{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data,
|
||||
VFIO_MIG_FLAG_END_OF_STATE }
|
||||
|
||||
In _SAVING device state or stop-and-copy phase
|
||||
a. read config space of device and save to migration file stream. This
|
||||
doesn't need to be from vendor driver. Any other special config state
|
||||
from driver can be saved as data in following iteration.
|
||||
b. read pending_bytes. If pending_bytes > 0, go through below steps.
|
||||
c. read data_offset - indicates kernel driver to write data to staging
|
||||
buffer.
|
||||
d. read data_size - amount of data in bytes written by vendor driver in
|
||||
migration region.
|
||||
e. read data_size bytes of data from data_offset in the migration region.
|
||||
f. Write data packet as below:
|
||||
{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data}
|
||||
g. iterate through steps b to f while (pending_bytes > 0)
|
||||
h. Write {VFIO_MIG_FLAG_END_OF_STATE}
|
||||
|
||||
When data region is mapped, its user's responsibility to read data from
|
||||
data_offset of data_size before moving to next steps.
|
||||
|
||||
Added fix suggested by Artem Polyakov to reset pending_bytes in
|
||||
vfio_save_iterate().
|
||||
Added fix suggested by Zhi Wang to add 0 as data size in migration stream and
|
||||
add END_OF_STATE delimiter to indicate phase complete.
|
||||
|
||||
Suggested-by: Artem Polyakov <artemp@nvidia.com>
|
||||
Suggested-by: Zhi Wang <zhi.wang.linux@gmail.com>
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/migration.c | 276 ++++++++++++++++++++++++++++++++++
|
||||
hw/vfio/trace-events | 6 +
|
||||
include/hw/vfio/vfio-common.h | 1 +
|
||||
3 files changed, 283 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||
index 405228fc5a..f78a77e1e3 100644
|
||||
--- a/hw/vfio/migration.c
|
||||
+++ b/hw/vfio/migration.c
|
||||
@@ -148,6 +148,151 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
|
||||
+ uint64_t data_size, uint64_t *size)
|
||||
+{
|
||||
+ void *ptr = NULL;
|
||||
+ uint64_t limit = 0;
|
||||
+ int i;
|
||||
+
|
||||
+ if (!region->mmaps) {
|
||||
+ if (size) {
|
||||
+ *size = MIN(data_size, region->size - data_offset);
|
||||
+ }
|
||||
+ return ptr;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < region->nr_mmaps; i++) {
|
||||
+ VFIOMmap *map = region->mmaps + i;
|
||||
+
|
||||
+ if ((data_offset >= map->offset) &&
|
||||
+ (data_offset < map->offset + map->size)) {
|
||||
+
|
||||
+ /* check if data_offset is within sparse mmap areas */
|
||||
+ ptr = map->mmap + data_offset - map->offset;
|
||||
+ if (size) {
|
||||
+ *size = MIN(data_size, map->offset + map->size - data_offset);
|
||||
+ }
|
||||
+ break;
|
||||
+ } else if ((data_offset < map->offset) &&
|
||||
+ (!limit || limit > map->offset)) {
|
||||
+ /*
|
||||
+ * data_offset is not within sparse mmap areas, find size of
|
||||
+ * non-mapped area. Check through all list since region->mmaps list
|
||||
+ * is not sorted.
|
||||
+ */
|
||||
+ limit = map->offset;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!ptr && size) {
|
||||
+ *size = limit ? MIN(data_size, limit - data_offset) : data_size;
|
||||
+ }
|
||||
+ return ptr;
|
||||
+}
|
||||
+
|
||||
+static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
|
||||
+{
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ VFIORegion *region = &migration->region;
|
||||
+ uint64_t data_offset = 0, data_size = 0, sz;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
|
||||
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
|
||||
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
|
||||
+ migration->pending_bytes);
|
||||
+
|
||||
+ qemu_put_be64(f, data_size);
|
||||
+ sz = data_size;
|
||||
+
|
||||
+ while (sz) {
|
||||
+ void *buf;
|
||||
+ uint64_t sec_size;
|
||||
+ bool buf_allocated = false;
|
||||
+
|
||||
+ buf = get_data_section_size(region, data_offset, sz, &sec_size);
|
||||
+
|
||||
+ if (!buf) {
|
||||
+ buf = g_try_malloc(sec_size);
|
||||
+ if (!buf) {
|
||||
+ error_report("%s: Error allocating buffer ", __func__);
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+ buf_allocated = true;
|
||||
+
|
||||
+ ret = vfio_mig_read(vbasedev, buf, sec_size,
|
||||
+ region->fd_offset + data_offset);
|
||||
+ if (ret < 0) {
|
||||
+ g_free(buf);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ qemu_put_buffer(f, buf, sec_size);
|
||||
+
|
||||
+ if (buf_allocated) {
|
||||
+ g_free(buf);
|
||||
+ }
|
||||
+ sz -= sec_size;
|
||||
+ data_offset += sec_size;
|
||||
+ }
|
||||
+
|
||||
+ ret = qemu_file_get_error(f);
|
||||
+
|
||||
+ if (!ret && size) {
|
||||
+ *size = data_size;
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int vfio_update_pending(VFIODevice *vbasedev)
|
||||
+{
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ VFIORegion *region = &migration->region;
|
||||
+ uint64_t pending_bytes = 0;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
|
||||
+ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
|
||||
+ if (ret < 0) {
|
||||
+ migration->pending_bytes = 0;
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ migration->pending_bytes = pending_bytes;
|
||||
+ trace_vfio_update_pending(vbasedev->name, pending_bytes);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+
|
||||
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
|
||||
+
|
||||
+ if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
|
||||
+ vbasedev->ops->vfio_save_config(vbasedev, f);
|
||||
+ }
|
||||
+
|
||||
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
+
|
||||
+ trace_vfio_save_device_config_state(vbasedev->name);
|
||||
+
|
||||
+ return qemu_file_get_error(f);
|
||||
+}
|
||||
+
|
||||
static void vfio_migration_cleanup(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
@@ -210,9 +355,140 @@ static void vfio_save_cleanup(void *opaque)
|
||||
trace_vfio_save_cleanup(vbasedev->name);
|
||||
}
|
||||
|
||||
+static void vfio_save_pending(QEMUFile *f, void *opaque,
|
||||
+ uint64_t threshold_size,
|
||||
+ uint64_t *res_precopy_only,
|
||||
+ uint64_t *res_compatible,
|
||||
+ uint64_t *res_postcopy_only)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = vfio_update_pending(vbasedev);
|
||||
+ if (ret) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ *res_precopy_only += migration->pending_bytes;
|
||||
+
|
||||
+ trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
|
||||
+ *res_postcopy_only, *res_compatible);
|
||||
+}
|
||||
+
|
||||
+static int vfio_save_iterate(QEMUFile *f, void *opaque)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ uint64_t data_size;
|
||||
+ int ret;
|
||||
+
|
||||
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
|
||||
+
|
||||
+ if (migration->pending_bytes == 0) {
|
||||
+ ret = vfio_update_pending(vbasedev);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ if (migration->pending_bytes == 0) {
|
||||
+ qemu_put_be64(f, 0);
|
||||
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
+ /* indicates data finished, goto complete phase */
|
||||
+ return 1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_save_buffer(f, vbasedev, &data_size);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
|
||||
+ strerror(errno));
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
+
|
||||
+ ret = qemu_file_get_error(f);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Reset pending_bytes as .save_live_pending is not called during savevm or
|
||||
+ * snapshot case, in such case vfio_update_pending() at the start of this
|
||||
+ * function updates pending_bytes.
|
||||
+ */
|
||||
+ migration->pending_bytes = 0;
|
||||
+ trace_vfio_save_iterate(vbasedev->name, data_size);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ uint64_t data_size;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING,
|
||||
+ VFIO_DEVICE_STATE_SAVING);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to set state STOP and SAVING",
|
||||
+ vbasedev->name);
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_save_device_config_state(f, opaque);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_update_pending(vbasedev);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ while (migration->pending_bytes > 0) {
|
||||
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
|
||||
+ ret = vfio_save_buffer(f, vbasedev, &data_size);
|
||||
+ if (ret < 0) {
|
||||
+ error_report("%s: Failed to save buffer", vbasedev->name);
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ if (data_size == 0) {
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_update_pending(vbasedev);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
+
|
||||
+ ret = qemu_file_get_error(f);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to set state STOPPED", vbasedev->name);
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ trace_vfio_save_complete_precopy(vbasedev->name);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static SaveVMHandlers savevm_vfio_handlers = {
|
||||
.save_setup = vfio_save_setup,
|
||||
.save_cleanup = vfio_save_cleanup,
|
||||
+ .save_live_pending = vfio_save_pending,
|
||||
+ .save_live_iterate = vfio_save_iterate,
|
||||
+ .save_live_complete_precopy = vfio_save_complete_precopy,
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||
index 86c18def01..9a1c5e17d9 100644
|
||||
--- a/hw/vfio/trace-events
|
||||
+++ b/hw/vfio/trace-events
|
||||
@@ -151,3 +151,9 @@ vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t
|
||||
vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
|
||||
vfio_save_setup(const char *name) " (%s)"
|
||||
vfio_save_cleanup(const char *name) " (%s)"
|
||||
+vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64
|
||||
+vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64
|
||||
+vfio_save_device_config_state(const char *name) " (%s)"
|
||||
+vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
|
||||
+vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
|
||||
+vfio_save_complete_precopy(const char *name) " (%s)"
|
||||
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||
index efff0590ae..c825524606 100644
|
||||
--- a/include/hw/vfio/vfio-common.h
|
||||
+++ b/include/hw/vfio/vfio-common.h
|
||||
@@ -65,6 +65,7 @@ typedef struct VFIOMigration {
|
||||
uint32_t device_state;
|
||||
int vm_running;
|
||||
Notifier migration_state;
|
||||
+ uint64_t pending_bytes;
|
||||
} VFIOMigration;
|
||||
|
||||
typedef struct VFIOAddressSpace {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
55
vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch
Normal file
55
vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch
Normal file
@ -0,0 +1,55 @@
|
||||
From c1de789d89132b66243fbfe253f10764ce514a08 Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:12 +0530
|
||||
Subject: [PATCH] vfio: Add vfio_get_object callback to VFIODeviceOps
|
||||
|
||||
Hook vfio_get_object callback for PCI devices.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Suggested-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/pci.c | 8 ++++++++
|
||||
include/hw/vfio/vfio-common.h | 1 +
|
||||
2 files changed, 9 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||
index d7a4e1875c..de0d286fc9 100644
|
||||
--- a/hw/vfio/pci.c
|
||||
+++ b/hw/vfio/pci.c
|
||||
@@ -2388,10 +2388,18 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev)
|
||||
}
|
||||
}
|
||||
|
||||
+static Object *vfio_pci_get_object(VFIODevice *vbasedev)
|
||||
+{
|
||||
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||
+
|
||||
+ return OBJECT(vdev);
|
||||
+}
|
||||
+
|
||||
static VFIODeviceOps vfio_pci_ops = {
|
||||
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
|
||||
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
|
||||
.vfio_eoi = vfio_intx_eoi,
|
||||
+ .vfio_get_object = vfio_pci_get_object,
|
||||
};
|
||||
|
||||
int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
|
||||
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||
index 93493891ba..771b6d59a3 100644
|
||||
--- a/include/hw/vfio/vfio-common.h
|
||||
+++ b/include/hw/vfio/vfio-common.h
|
||||
@@ -119,6 +119,7 @@ struct VFIODeviceOps {
|
||||
void (*vfio_compute_needs_reset)(VFIODevice *vdev);
|
||||
int (*vfio_hot_reset_multi)(VFIODevice *vdev);
|
||||
void (*vfio_eoi)(VFIODevice *vdev);
|
||||
+ Object *(*vfio_get_object)(VFIODevice *vdev);
|
||||
};
|
||||
|
||||
typedef struct VFIOGroup {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
182
vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch
Normal file
182
vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch
Normal file
@ -0,0 +1,182 @@
|
||||
From 3ac0647003d192579bcb6c1081b75d9c8ada78e0 Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:23 +0530
|
||||
Subject: [PATCH] vfio: Add vfio_listener_log_sync to mark dirty pages
|
||||
|
||||
vfio_listener_log_sync gets list of dirty pages from container using
|
||||
VFIO_IOMMU_GET_DIRTY_BITMAP ioctl and mark those pages dirty when all
|
||||
devices are stopped and saving state.
|
||||
Return early for the RAM block section of mapped MMIO region.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
[aw: fix error_report types, fix cpu_physical_memory_set_dirty_lebitmap() cast]
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/common.c | 116 +++++++++++++++++++++++++++++++++++++++++++
|
||||
hw/vfio/trace-events | 1 +
|
||||
2 files changed, 117 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index 35168b8f3e..4d2828fc97 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "hw/vfio/vfio.h"
|
||||
#include "exec/address-spaces.h"
|
||||
#include "exec/memory.h"
|
||||
+#include "exec/ram_addr.h"
|
||||
#include "hw/hw.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/range.h"
|
||||
@@ -36,6 +37,7 @@
|
||||
#include "sysemu/kvm.h"
|
||||
#include "trace.h"
|
||||
#include "qapi/error.h"
|
||||
+#include "migration/migration.h"
|
||||
|
||||
VFIOGroupList vfio_group_list =
|
||||
QLIST_HEAD_INITIALIZER(vfio_group_list);
|
||||
@@ -285,6 +287,39 @@ const MemoryRegionOps vfio_region_ops = {
|
||||
},
|
||||
};
|
||||
|
||||
+/*
|
||||
+ * Device state interfaces
|
||||
+ */
|
||||
+
|
||||
+static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container)
|
||||
+{
|
||||
+ VFIOGroup *group;
|
||||
+ VFIODevice *vbasedev;
|
||||
+ MigrationState *ms = migrate_get_current();
|
||||
+
|
||||
+ if (!migration_is_setup_or_active(ms->state)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ QLIST_FOREACH(group, &container->group_list, container_next) {
|
||||
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+
|
||||
+ if (!migration) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) &&
|
||||
+ !(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) {
|
||||
+ continue;
|
||||
+ } else {
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
|
||||
*/
|
||||
@@ -794,9 +829,90 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
}
|
||||
}
|
||||
|
||||
+static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||
+ uint64_t size, ram_addr_t ram_addr)
|
||||
+{
|
||||
+ struct vfio_iommu_type1_dirty_bitmap *dbitmap;
|
||||
+ struct vfio_iommu_type1_dirty_bitmap_get *range;
|
||||
+ uint64_t pages;
|
||||
+ int ret;
|
||||
+
|
||||
+ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
|
||||
+
|
||||
+ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
|
||||
+ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
|
||||
+ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
|
||||
+ range->iova = iova;
|
||||
+ range->size = size;
|
||||
+
|
||||
+ /*
|
||||
+ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
|
||||
+ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to
|
||||
+ * TARGET_PAGE_SIZE.
|
||||
+ */
|
||||
+ range->bitmap.pgsize = TARGET_PAGE_SIZE;
|
||||
+
|
||||
+ pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS;
|
||||
+ range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
|
||||
+ BITS_PER_BYTE;
|
||||
+ range->bitmap.data = g_try_malloc0(range->bitmap.size);
|
||||
+ if (!range->bitmap.data) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto err_out;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
|
||||
+ if (ret) {
|
||||
+ error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
|
||||
+ " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
|
||||
+ (uint64_t)range->size, errno);
|
||||
+ goto err_out;
|
||||
+ }
|
||||
+
|
||||
+ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data,
|
||||
+ ram_addr, pages);
|
||||
+
|
||||
+ trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size,
|
||||
+ range->bitmap.size, ram_addr);
|
||||
+err_out:
|
||||
+ g_free(range->bitmap.data);
|
||||
+ g_free(dbitmap);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||
+ MemoryRegionSection *section)
|
||||
+{
|
||||
+ ram_addr_t ram_addr;
|
||||
+
|
||||
+ ram_addr = memory_region_get_ram_addr(section->mr) +
|
||||
+ section->offset_within_region;
|
||||
+
|
||||
+ return vfio_get_dirty_bitmap(container,
|
||||
+ TARGET_PAGE_ALIGN(section->offset_within_address_space),
|
||||
+ int128_get64(section->size), ram_addr);
|
||||
+}
|
||||
+
|
||||
+static void vfio_listerner_log_sync(MemoryListener *listener,
|
||||
+ MemoryRegionSection *section)
|
||||
+{
|
||||
+ VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||
+
|
||||
+ if (vfio_listener_skipped_section(section) ||
|
||||
+ !container->dirty_pages_supported) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (vfio_devices_all_stopped_and_saving(container)) {
|
||||
+ vfio_sync_dirty_bitmap(container, section);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static const MemoryListener vfio_memory_listener = {
|
||||
.region_add = vfio_listener_region_add,
|
||||
.region_del = vfio_listener_region_del,
|
||||
+ .log_sync = vfio_listerner_log_sync,
|
||||
};
|
||||
|
||||
static void vfio_listener_release(VFIOContainer *container)
|
||||
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||
index 4f08f5a633..4167f35d64 100644
|
||||
--- a/hw/vfio/trace-events
|
||||
+++ b/hw/vfio/trace-events
|
||||
@@ -161,3 +161,4 @@ vfio_load_device_config_state(const char *name) " (%s)"
|
||||
vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
|
||||
vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
|
||||
vfio_load_cleanup(const char *name) " (%s)"
|
||||
+vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
|
||||
--
|
||||
2.27.0
|
||||
|
||||
162
vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch
Normal file
162
vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch
Normal file
@ -0,0 +1,162 @@
|
||||
From a400753d0f1a008367165aadf375abfe86a66ed7 Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:24 +0530
|
||||
Subject: [PATCH] vfio: Dirty page tracking when vIOMMU is enabled
|
||||
|
||||
When vIOMMU is enabled, register MAP notifier from log_sync when all
|
||||
devices in container are in stop and copy phase of migration. Call replay
|
||||
and get dirty pages from notifier callback.
|
||||
|
||||
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/common.c | 88 +++++++++++++++++++++++++++++++++++++++++---
|
||||
hw/vfio/trace-events | 1 +
|
||||
2 files changed, 83 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index 4d2828fc97..8773b998ac 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -441,8 +441,8 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
|
||||
}
|
||||
|
||||
/* Called with rcu_read_lock held. */
|
||||
-static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
- bool *read_only)
|
||||
+static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
+ ram_addr_t *ram_addr, bool *read_only)
|
||||
{
|
||||
MemoryRegion *mr;
|
||||
hwaddr xlat;
|
||||
@@ -473,8 +473,17 @@ static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
return false;
|
||||
}
|
||||
|
||||
- *vaddr = memory_region_get_ram_ptr(mr) + xlat;
|
||||
- *read_only = !writable || mr->readonly;
|
||||
+ if (vaddr) {
|
||||
+ *vaddr = memory_region_get_ram_ptr(mr) + xlat;
|
||||
+ }
|
||||
+
|
||||
+ if (ram_addr) {
|
||||
+ *ram_addr = memory_region_get_ram_addr(mr) + xlat;
|
||||
+ }
|
||||
+
|
||||
+ if (read_only) {
|
||||
+ *read_only = !writable || mr->readonly;
|
||||
+ }
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -484,7 +493,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
|
||||
VFIOContainer *container = giommu->container;
|
||||
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||
- bool read_only;
|
||||
void *vaddr;
|
||||
int ret;
|
||||
|
||||
@@ -500,7 +508,9 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
rcu_read_lock();
|
||||
|
||||
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
|
||||
- if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
|
||||
+ bool read_only;
|
||||
+
|
||||
+ if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
@@ -881,11 +891,77 @@ err_out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+typedef struct {
|
||||
+ IOMMUNotifier n;
|
||||
+ VFIOGuestIOMMU *giommu;
|
||||
+} vfio_giommu_dirty_notifier;
|
||||
+
|
||||
+static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
+{
|
||||
+ vfio_giommu_dirty_notifier *gdn = container_of(n,
|
||||
+ vfio_giommu_dirty_notifier, n);
|
||||
+ VFIOGuestIOMMU *giommu = gdn->giommu;
|
||||
+ VFIOContainer *container = giommu->container;
|
||||
+ hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||
+ ram_addr_t translated_addr;
|
||||
+
|
||||
+ trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
|
||||
+
|
||||
+ if (iotlb->target_as != &address_space_memory) {
|
||||
+ error_report("Wrong target AS \"%s\", only system memory is allowed",
|
||||
+ iotlb->target_as->name ? iotlb->target_as->name : "none");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
|
||||
+ translated_addr);
|
||||
+ if (ret) {
|
||||
+ error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
|
||||
+ "0x%"HWADDR_PRIx") = %d (%m)",
|
||||
+ container, iova,
|
||||
+ iotlb->addr_mask + 1, ret);
|
||||
+ }
|
||||
+ }
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+
|
||||
static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
ram_addr_t ram_addr;
|
||||
|
||||
+ if (memory_region_is_iommu(section->mr)) {
|
||||
+ VFIOGuestIOMMU *giommu;
|
||||
+
|
||||
+ QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
|
||||
+ if (MEMORY_REGION(giommu->iommu) == section->mr &&
|
||||
+ giommu->n.start == section->offset_within_region) {
|
||||
+ Int128 llend;
|
||||
+ vfio_giommu_dirty_notifier gdn = { .giommu = giommu };
|
||||
+ int idx = memory_region_iommu_attrs_to_index(giommu->iommu,
|
||||
+ MEMTXATTRS_UNSPECIFIED);
|
||||
+
|
||||
+ llend = int128_add(int128_make64(section->offset_within_region),
|
||||
+ section->size);
|
||||
+ llend = int128_sub(llend, int128_one());
|
||||
+
|
||||
+ iommu_notifier_init(&gdn.n,
|
||||
+ vfio_iommu_map_dirty_notify,
|
||||
+ IOMMU_NOTIFIER_MAP,
|
||||
+ section->offset_within_region,
|
||||
+ int128_get64(llend),
|
||||
+ idx);
|
||||
+ memory_region_iommu_replay(giommu->iommu, &gdn.n);
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
ram_addr = memory_region_get_ram_addr(section->mr) +
|
||||
section->offset_within_region;
|
||||
|
||||
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||
index 4167f35d64..575ebde6e0 100644
|
||||
--- a/hw/vfio/trace-events
|
||||
+++ b/hw/vfio/trace-events
|
||||
@@ -162,3 +162,4 @@ vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
|
||||
vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
|
||||
vfio_load_cleanup(const char *name) " (%s)"
|
||||
vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
|
||||
+vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
|
||||
--
|
||||
2.27.0
|
||||
|
||||
186
vfio-Get-migration-capability-flags-for-container.patch
Normal file
186
vfio-Get-migration-capability-flags-for-container.patch
Normal file
@ -0,0 +1,186 @@
|
||||
From fc49c9cbf2deba53370f48ad9db2adc5f6ceb3ba Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:21 +0530
|
||||
Subject: [PATCH] vfio: Get migration capability flags for container
|
||||
|
||||
Added helper functions to get IOMMU info capability chain.
|
||||
Added function to get migration capability information from that
|
||||
capability chain for IOMMU container.
|
||||
|
||||
Similar change was proposed earlier:
|
||||
https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html
|
||||
|
||||
Disable migration for devices if IOMMU module doesn't support migration
|
||||
capability.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
|
||||
Cc: Eric Auger <eric.auger@redhat.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/common.c | 90 +++++++++++++++++++++++++++++++----
|
||||
hw/vfio/migration.c | 7 ++-
|
||||
include/hw/vfio/vfio-common.h | 3 ++
|
||||
3 files changed, 91 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||
index 4c32b1bb99..35168b8f3e 100644
|
||||
--- a/hw/vfio/common.c
|
||||
+++ b/hw/vfio/common.c
|
||||
@@ -1210,6 +1210,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int vfio_get_iommu_info(VFIOContainer *container,
|
||||
+ struct vfio_iommu_type1_info **info)
|
||||
+{
|
||||
+
|
||||
+ size_t argsz = sizeof(struct vfio_iommu_type1_info);
|
||||
+
|
||||
+ *info = g_new0(struct vfio_iommu_type1_info, 1);
|
||||
+again:
|
||||
+ (*info)->argsz = argsz;
|
||||
+
|
||||
+ if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
|
||||
+ g_free(*info);
|
||||
+ *info = NULL;
|
||||
+ return -errno;
|
||||
+ }
|
||||
+
|
||||
+ if (((*info)->argsz > argsz)) {
|
||||
+ argsz = (*info)->argsz;
|
||||
+ *info = g_realloc(*info, argsz);
|
||||
+ goto again;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct vfio_info_cap_header *
|
||||
+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
|
||||
+{
|
||||
+ struct vfio_info_cap_header *hdr;
|
||||
+ void *ptr = info;
|
||||
+
|
||||
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
|
||||
+ if (hdr->id == id) {
|
||||
+ return hdr;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||
+ struct vfio_iommu_type1_info *info)
|
||||
+{
|
||||
+ struct vfio_info_cap_header *hdr;
|
||||
+ struct vfio_iommu_type1_info_cap_migration *cap_mig;
|
||||
+
|
||||
+ hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
|
||||
+ if (!hdr) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration,
|
||||
+ header);
|
||||
+
|
||||
+ /*
|
||||
+ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
|
||||
+ * TARGET_PAGE_SIZE to mark those dirty.
|
||||
+ */
|
||||
+ if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) {
|
||||
+ container->dirty_pages_supported = true;
|
||||
+ container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
|
||||
+ container->dirty_pgsizes = cap_mig->pgsize_bitmap;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
Error **errp)
|
||||
{
|
||||
@@ -1273,6 +1342,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
container = g_malloc0(sizeof(*container));
|
||||
container->space = space;
|
||||
container->fd = fd;
|
||||
+ container->dirty_pages_supported = false;
|
||||
QLIST_INIT(&container->giommu_list);
|
||||
QLIST_INIT(&container->hostwin_list);
|
||||
|
||||
@@ -1285,7 +1355,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
case VFIO_TYPE1v2_IOMMU:
|
||||
case VFIO_TYPE1_IOMMU:
|
||||
{
|
||||
- struct vfio_iommu_type1_info info;
|
||||
+ struct vfio_iommu_type1_info *info;
|
||||
|
||||
/*
|
||||
* FIXME: This assumes that a Type1 IOMMU can map any 64-bit
|
||||
@@ -1294,15 +1364,19 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
* existing Type1 IOMMUs generally support any IOVA we're
|
||||
* going to actually try in practice.
|
||||
*/
|
||||
- info.argsz = sizeof(info);
|
||||
- ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
|
||||
- /* Ignore errors */
|
||||
- if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
|
||||
+ ret = vfio_get_iommu_info(container, &info);
|
||||
+
|
||||
+ if (ret || !(info->flags & VFIO_IOMMU_INFO_PGSIZES)) {
|
||||
/* Assume 4k IOVA page size */
|
||||
- info.iova_pgsizes = 4096;
|
||||
+ info->iova_pgsizes = 4096;
|
||||
}
|
||||
- vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
|
||||
- container->pgsizes = info.iova_pgsizes;
|
||||
+ vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes);
|
||||
+ container->pgsizes = info->iova_pgsizes;
|
||||
+
|
||||
+ if (!ret) {
|
||||
+ vfio_get_iommu_info_migration(container, info);
|
||||
+ }
|
||||
+ g_free(info);
|
||||
break;
|
||||
}
|
||||
case VFIO_SPAPR_TCE_v2_IOMMU:
|
||||
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||
index 954c064435..0d2bd9e5cd 100644
|
||||
--- a/hw/vfio/migration.c
|
||||
+++ b/hw/vfio/migration.c
|
||||
@@ -832,9 +832,14 @@ err:
|
||||
|
||||
int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
|
||||
{
|
||||
+ VFIOContainer *container = vbasedev->group->container;
|
||||
struct vfio_region_info *info = NULL;
|
||||
Error *local_err = NULL;
|
||||
- int ret;
|
||||
+ int ret = -ENOTSUP;
|
||||
+
|
||||
+ if (!container->dirty_pages_supported) {
|
||||
+ goto add_blocker;
|
||||
+ }
|
||||
|
||||
ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
|
||||
VFIO_REGION_SUBTYPE_MIGRATION, &info);
|
||||
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||
index c825524606..8fd0212264 100644
|
||||
--- a/include/hw/vfio/vfio-common.h
|
||||
+++ b/include/hw/vfio/vfio-common.h
|
||||
@@ -84,6 +84,9 @@ typedef struct VFIOContainer {
|
||||
unsigned iommu_type;
|
||||
int error;
|
||||
bool initialized;
|
||||
+ bool dirty_pages_supported;
|
||||
+ uint64_t dirty_pgsizes;
|
||||
+ uint64_t max_dirty_bitmap_size;
|
||||
unsigned long pgsizes;
|
||||
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
73
vfio-Make-vfio-pci-device-migration-capable.patch
Normal file
73
vfio-Make-vfio-pci-device-migration-capable.patch
Normal file
@ -0,0 +1,73 @@
|
||||
From b20bf027d44809dd6c6376cf0b77e5c5b2057cba Mon Sep 17 00:00:00 2001
|
||||
From: Jens Freimann <jfreimann@redhat.com>
|
||||
Date: Tue, 29 Oct 2019 12:49:05 +0100
|
||||
Subject: [PATCH] vfio: Make vfio-pci device migration capable
|
||||
|
||||
If the device is not a failover primary device, call
|
||||
vfio_migration_probe() and vfio_migration_finalize() to enable
|
||||
migration support for those devices that support it respectively to
|
||||
tear it down again.
|
||||
Removed migration blocker from VFIO PCI device specific structure and use
|
||||
migration blocker from generic structure of VFIO device.
|
||||
|
||||
Note: Since the current version don't add the failover feature for assigned
|
||||
PCI devices, just remove the failover related code in the original patch for
|
||||
simplicity.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
Signed-off-by: Shenming Lu <lushenming@huawei.com>
|
||||
---
|
||||
hw/vfio/pci.c | 12 ++++++------
|
||||
1 file changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||
index b9fae3ad28..a637c35e7a 100644
|
||||
--- a/hw/vfio/pci.c
|
||||
+++ b/hw/vfio/pci.c
|
||||
@@ -3049,6 +3049,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
+ ret = vfio_migration_probe(&vdev->vbasedev, errp);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Migration disabled", vdev->vbasedev.name);
|
||||
+ }
|
||||
+
|
||||
vfio_register_err_notifier(vdev);
|
||||
vfio_register_req_notifier(vdev);
|
||||
vfio_setup_resetfn_quirk(vdev);
|
||||
@@ -3096,6 +3101,7 @@ static void vfio_exitfn(PCIDevice *pdev)
|
||||
}
|
||||
vfio_teardown_msi(vdev);
|
||||
vfio_bars_exit(vdev);
|
||||
+ vfio_migration_finalize(&vdev->vbasedev);
|
||||
}
|
||||
|
||||
static void vfio_pci_reset(DeviceState *dev)
|
||||
@@ -3204,11 +3210,6 @@ static Property vfio_pci_dev_properties[] = {
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
-static const VMStateDescription vfio_pci_vmstate = {
|
||||
- .name = "vfio-pci",
|
||||
- .unmigratable = 1,
|
||||
-};
|
||||
-
|
||||
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||
@@ -3216,7 +3217,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
|
||||
|
||||
dc->reset = vfio_pci_reset;
|
||||
dc->props = vfio_pci_dev_properties;
|
||||
- dc->vmsd = &vfio_pci_vmstate;
|
||||
dc->desc = "VFIO-based PCI device assignment";
|
||||
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
||||
pdc->realize = vfio_realize;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
183
vfio-Register-SaveVMHandlers-for-VFIO-device.patch
Normal file
183
vfio-Register-SaveVMHandlers-for-VFIO-device.patch
Normal file
@ -0,0 +1,183 @@
|
||||
From cd5b58f2ba20e59f2c29d955b8bbd7f5016030b7 Mon Sep 17 00:00:00 2001
|
||||
From: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Date: Mon, 26 Oct 2020 15:06:17 +0530
|
||||
Subject: [PATCH] vfio: Register SaveVMHandlers for VFIO device
|
||||
|
||||
Define flags to be used as delimiter in migration stream for VFIO devices.
|
||||
Added .save_setup and .save_cleanup functions. Map & unmap migration
|
||||
region from these functions at source during saving or pre-copy phase.
|
||||
|
||||
Set VFIO device state depending on VM's state. During live migration, VM is
|
||||
running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
|
||||
device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
|
||||
|
||||
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
|
||||
Reviewed-by: Neo Jia <cjia@nvidia.com>
|
||||
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
|
||||
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
||||
---
|
||||
hw/vfio/migration.c | 102 +++++++++++++++++++++++++++++++++++++++++++
|
||||
hw/vfio/trace-events | 2 +
|
||||
2 files changed, 104 insertions(+)
|
||||
|
||||
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||
index 0c6c9b655f..405228fc5a 100644
|
||||
--- a/hw/vfio/migration.c
|
||||
+++ b/hw/vfio/migration.c
|
||||
@@ -8,12 +8,15 @@
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
+#include "qemu/main-loop.h"
|
||||
+#include "qemu/cutils.h"
|
||||
#include <linux/vfio.h>
|
||||
|
||||
#include "sysemu/sysemu.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "cpu.h"
|
||||
#include "migration/migration.h"
|
||||
+#include "migration/vmstate.h"
|
||||
#include "migration/qemu-file.h"
|
||||
#include "migration/register.h"
|
||||
#include "migration/blocker.h"
|
||||
@@ -25,6 +28,22 @@
|
||||
#include "trace.h"
|
||||
#include "hw/hw.h"
|
||||
|
||||
+/*
|
||||
+ * Flags to be used as unique delimiters for VFIO devices in the migration
|
||||
+ * stream. These flags are composed as:
|
||||
+ * 0xffffffff => MSB 32-bit all 1s
|
||||
+ * 0xef10 => Magic ID, represents emulated (virtual) function IO
|
||||
+ * 0x0000 => 16-bits reserved for flags
|
||||
+ *
|
||||
+ * The beginning of state information is marked by _DEV_CONFIG_STATE,
|
||||
+ * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
|
||||
+ * certain state information is marked by _END_OF_STATE.
|
||||
+ */
|
||||
+#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
|
||||
+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
|
||||
+#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
|
||||
+#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
|
||||
+
|
||||
static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
|
||||
off_t off, bool iswrite)
|
||||
{
|
||||
@@ -129,6 +148,75 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void vfio_migration_cleanup(VFIODevice *vbasedev)
|
||||
+{
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+
|
||||
+ if (migration->region.mmaps) {
|
||||
+ vfio_region_unmap(&migration->region);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* ---------------------------------------------------------------------- */
|
||||
+
|
||||
+static int vfio_save_setup(QEMUFile *f, void *opaque)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+ VFIOMigration *migration = vbasedev->migration;
|
||||
+ int ret;
|
||||
+
|
||||
+ trace_vfio_save_setup(vbasedev->name);
|
||||
+
|
||||
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
|
||||
+
|
||||
+ if (migration->region.mmaps) {
|
||||
+ /*
|
||||
+ * Calling vfio_region_mmap() from migration thread. Memory API called
|
||||
+ * from this function require locking the iothread when called from
|
||||
+ * outside the main loop thread.
|
||||
+ */
|
||||
+ qemu_mutex_lock_iothread();
|
||||
+ ret = vfio_region_mmap(&migration->region);
|
||||
+ qemu_mutex_unlock_iothread();
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to mmap VFIO migration region: %s",
|
||||
+ vbasedev->name, strerror(-ret));
|
||||
+ error_report("%s: Falling back to slow path", vbasedev->name);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
|
||||
+ VFIO_DEVICE_STATE_SAVING);
|
||||
+ if (ret) {
|
||||
+ error_report("%s: Failed to set state SAVING", vbasedev->name);
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
+
|
||||
+ ret = qemu_file_get_error(f);
|
||||
+ if (ret) {
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void vfio_save_cleanup(void *opaque)
|
||||
+{
|
||||
+ VFIODevice *vbasedev = opaque;
|
||||
+
|
||||
+ vfio_migration_cleanup(vbasedev);
|
||||
+ trace_vfio_save_cleanup(vbasedev->name);
|
||||
+}
|
||||
+
|
||||
+static SaveVMHandlers savevm_vfio_handlers = {
|
||||
+ .save_setup = vfio_save_setup,
|
||||
+ .save_cleanup = vfio_save_cleanup,
|
||||
+};
|
||||
+
|
||||
+/* ---------------------------------------------------------------------- */
|
||||
+
|
||||
static void vfio_vmstate_change(void *opaque, int running, RunState state)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
@@ -215,6 +303,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
|
||||
int ret;
|
||||
Object *obj;
|
||||
VFIOMigration *migration;
|
||||
+ char id[256] = "";
|
||||
+ g_autofree char *path = NULL, *oid = NULL;
|
||||
|
||||
if (!vbasedev->ops->vfio_get_object) {
|
||||
return -EINVAL;
|
||||
@@ -244,6 +334,18 @@ static int vfio_migration_init(VFIODevice *vbasedev,
|
||||
|
||||
migration = vbasedev->migration;
|
||||
migration->vbasedev = vbasedev;
|
||||
+
|
||||
+ oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
|
||||
+ if (oid) {
|
||||
+ path = g_strdup_printf("%s/vfio", oid);
|
||||
+ } else {
|
||||
+ path = g_strdup("vfio");
|
||||
+ }
|
||||
+ strpadcpy(id, sizeof(id), path, '\0');
|
||||
+
|
||||
+ register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
|
||||
+ vbasedev);
|
||||
+
|
||||
migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
|
||||
vbasedev);
|
||||
migration->migration_state.notify = vfio_migration_state_notifier;
|
||||
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||
index bd3d47b005..86c18def01 100644
|
||||
--- a/hw/vfio/trace-events
|
||||
+++ b/hw/vfio/trace-events
|
||||
@@ -149,3 +149,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
|
||||
vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
|
||||
vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
|
||||
vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
|
||||
+vfio_save_setup(const char *name) " (%s)"
|
||||
+vfio_save_cleanup(const char *name) " (%s)"
|
||||
--
|
||||
2.27.0
|
||||
|
||||
210
vmstate-add-qom-interface-to-get-id.patch
Normal file
210
vmstate-add-qom-interface-to-get-id.patch
Normal file
@ -0,0 +1,210 @@
|
||||
From d771fca664e40c7d7ec5dfa2c656a282bff705b7 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
|
||||
Date: Wed, 28 Aug 2019 16:00:19 +0400
|
||||
Subject: [PATCH] vmstate: add qom interface to get id
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add an interface to get the instance id, instead of depending on
|
||||
Device and qdev_get_dev_path().
|
||||
|
||||
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
MAINTAINERS | 2 ++
|
||||
hw/core/Makefile.objs | 1 +
|
||||
hw/core/qdev.c | 14 +++++++++++++
|
||||
hw/core/vmstate-if.c | 23 +++++++++++++++++++++
|
||||
include/hw/vmstate-if.h | 40 ++++++++++++++++++++++++++++++++++++
|
||||
include/migration/register.h | 2 ++
|
||||
include/migration/vmstate.h | 2 ++
|
||||
tests/Makefile.include | 1 +
|
||||
8 files changed, 85 insertions(+)
|
||||
create mode 100644 hw/core/vmstate-if.c
|
||||
create mode 100644 include/hw/vmstate-if.h
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index d6de200453..e2d74d7ec3 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -2135,6 +2135,8 @@ Migration
|
||||
M: Juan Quintela <quintela@redhat.com>
|
||||
M: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
S: Maintained
|
||||
+F: hw/core/vmstate-if.c
|
||||
+F: include/hw/vmstate-if.h
|
||||
F: include/migration/
|
||||
F: migration/
|
||||
F: scripts/vmstate-static-checker.py
|
||||
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
|
||||
index f8481d959f..54c51583d8 100644
|
||||
--- a/hw/core/Makefile.objs
|
||||
+++ b/hw/core/Makefile.objs
|
||||
@@ -8,6 +8,7 @@ common-obj-y += irq.o
|
||||
common-obj-y += hotplug.o
|
||||
common-obj-$(CONFIG_SOFTMMU) += nmi.o
|
||||
common-obj-$(CONFIG_SOFTMMU) += vm-change-state-handler.o
|
||||
+common-obj-y += vmstate-if.o
|
||||
|
||||
common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o
|
||||
common-obj-$(CONFIG_XILINX_AXI) += stream.o
|
||||
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
|
||||
index 4b32f2f46d..13931b1117 100644
|
||||
--- a/hw/core/qdev.c
|
||||
+++ b/hw/core/qdev.c
|
||||
@@ -1048,9 +1048,18 @@ static void device_unparent(Object *obj)
|
||||
}
|
||||
}
|
||||
|
||||
+static char *
|
||||
+device_vmstate_if_get_id(VMStateIf *obj)
|
||||
+{
|
||||
+ DeviceState *dev = DEVICE(obj);
|
||||
+
|
||||
+ return qdev_get_dev_path(dev);
|
||||
+}
|
||||
+
|
||||
static void device_class_init(ObjectClass *class, void *data)
|
||||
{
|
||||
DeviceClass *dc = DEVICE_CLASS(class);
|
||||
+ VMStateIfClass *vc = VMSTATE_IF_CLASS(class);
|
||||
|
||||
class->unparent = device_unparent;
|
||||
|
||||
@@ -1062,6 +1071,7 @@ static void device_class_init(ObjectClass *class, void *data)
|
||||
*/
|
||||
dc->hotpluggable = true;
|
||||
dc->user_creatable = true;
|
||||
+ vc->get_id = device_vmstate_if_get_id;
|
||||
}
|
||||
|
||||
void device_class_set_parent_reset(DeviceClass *dc,
|
||||
@@ -1119,6 +1129,10 @@ static const TypeInfo device_type_info = {
|
||||
.class_init = device_class_init,
|
||||
.abstract = true,
|
||||
.class_size = sizeof(DeviceClass),
|
||||
+ .interfaces = (InterfaceInfo[]) {
|
||||
+ { TYPE_VMSTATE_IF },
|
||||
+ { }
|
||||
+ }
|
||||
};
|
||||
|
||||
static void qdev_register_types(void)
|
||||
diff --git a/hw/core/vmstate-if.c b/hw/core/vmstate-if.c
|
||||
new file mode 100644
|
||||
index 0000000000..bf453620fe
|
||||
--- /dev/null
|
||||
+++ b/hw/core/vmstate-if.c
|
||||
@@ -0,0 +1,23 @@
|
||||
+/*
|
||||
+ * VMState interface
|
||||
+ *
|
||||
+ * Copyright (c) 2009-2019 Red Hat Inc
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "hw/vmstate-if.h"
|
||||
+
|
||||
+static const TypeInfo vmstate_if_info = {
|
||||
+ .name = TYPE_VMSTATE_IF,
|
||||
+ .parent = TYPE_INTERFACE,
|
||||
+ .class_size = sizeof(VMStateIfClass),
|
||||
+};
|
||||
+
|
||||
+static void vmstate_register_types(void)
|
||||
+{
|
||||
+ type_register_static(&vmstate_if_info);
|
||||
+}
|
||||
+
|
||||
+type_init(vmstate_register_types);
|
||||
diff --git a/include/hw/vmstate-if.h b/include/hw/vmstate-if.h
|
||||
new file mode 100644
|
||||
index 0000000000..8ff7f0f292
|
||||
--- /dev/null
|
||||
+++ b/include/hw/vmstate-if.h
|
||||
@@ -0,0 +1,40 @@
|
||||
+/*
|
||||
+ * VMState interface
|
||||
+ *
|
||||
+ * Copyright (c) 2009-2019 Red Hat Inc
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+
|
||||
+#ifndef VMSTATE_IF_H
|
||||
+#define VMSTATE_IF_H
|
||||
+
|
||||
+#include "qom/object.h"
|
||||
+
|
||||
+#define TYPE_VMSTATE_IF "vmstate-if"
|
||||
+
|
||||
+#define VMSTATE_IF_CLASS(klass) \
|
||||
+ OBJECT_CLASS_CHECK(VMStateIfClass, (klass), TYPE_VMSTATE_IF)
|
||||
+#define VMSTATE_IF_GET_CLASS(obj) \
|
||||
+ OBJECT_GET_CLASS(VMStateIfClass, (obj), TYPE_VMSTATE_IF)
|
||||
+#define VMSTATE_IF(obj) \
|
||||
+ INTERFACE_CHECK(VMStateIf, (obj), TYPE_VMSTATE_IF)
|
||||
+
|
||||
+typedef struct VMStateIf VMStateIf;
|
||||
+
|
||||
+typedef struct VMStateIfClass {
|
||||
+ InterfaceClass parent_class;
|
||||
+
|
||||
+ char * (*get_id)(VMStateIf *obj);
|
||||
+} VMStateIfClass;
|
||||
+
|
||||
+static inline char *vmstate_if_get_id(VMStateIf *vmif)
|
||||
+{
|
||||
+ if (!vmif) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ return VMSTATE_IF_GET_CLASS(vmif)->get_id(vmif);
|
||||
+}
|
||||
+
|
||||
+#endif /* VMSTATE_IF_H */
|
||||
diff --git a/include/migration/register.h b/include/migration/register.h
|
||||
index f3ba10b6ef..158130c8c4 100644
|
||||
--- a/include/migration/register.h
|
||||
+++ b/include/migration/register.h
|
||||
@@ -14,6 +14,8 @@
|
||||
#ifndef MIGRATION_REGISTER_H
|
||||
#define MIGRATION_REGISTER_H
|
||||
|
||||
+#include "hw/vmstate-if.h"
|
||||
+
|
||||
typedef struct SaveVMHandlers {
|
||||
/* This runs inside the iothread lock. */
|
||||
SaveStateHandler *save_state;
|
||||
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
|
||||
index 8abd2e3b80..8cc1e19fd9 100644
|
||||
--- a/include/migration/vmstate.h
|
||||
+++ b/include/migration/vmstate.h
|
||||
@@ -27,6 +27,8 @@
|
||||
#ifndef QEMU_VMSTATE_H
|
||||
#define QEMU_VMSTATE_H
|
||||
|
||||
+#include "hw/vmstate-if.h"
|
||||
+
|
||||
typedef struct VMStateInfo VMStateInfo;
|
||||
typedef struct VMStateDescription VMStateDescription;
|
||||
typedef struct VMStateField VMStateField;
|
||||
diff --git a/tests/Makefile.include b/tests/Makefile.include
|
||||
index 3be60ab999..1c7772a230 100644
|
||||
--- a/tests/Makefile.include
|
||||
+++ b/tests/Makefile.include
|
||||
@@ -566,6 +566,7 @@ tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
|
||||
hw/core/irq.o \
|
||||
hw/core/fw-path-provider.o \
|
||||
hw/core/reset.o \
|
||||
+ hw/core/vmstate-if.o \
|
||||
$(test-qapi-obj-y)
|
||||
tests/test-vmstate$(EXESUF): tests/test-vmstate.o \
|
||||
migration/vmstate.o migration/vmstate-types.o migration/qemu-file.o \
|
||||
--
|
||||
2.27.0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user