!673 Qemu update to version 6.2.0-55
From: @yezengruan Reviewed-by: @aven6 Signed-off-by: @aven6
This commit is contained in:
commit
4b7a282fcc
113
KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
Normal file
113
KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
Normal file
@ -0,0 +1,113 @@
|
||||
From 49cb3c9f3cc3a567ce2e6159bf27328c64b6601d Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Wed, 23 Mar 2022 12:33:25 +0100
|
||||
Subject: [PATCH 10/10] KVM: x86: workaround invalid CPUID[0xD,9] info on some
|
||||
AMD processors
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
from mainline-v7.0.0-rc2
|
||||
commit 58f7db26f21c690cf9a669c314cfd7371506084a
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit 58f7db26f21c ("KVM: x86: workaround invalid CPUID[0xD,9] info
|
||||
on some AMD processors")
|
||||
|
||||
----------------------------------------------------------------
|
||||
|
||||
KVM: x86: workaround invalid CPUID[0xD,9] info on some AMD processors
|
||||
|
||||
Some AMD processors expose the PKRU extended save state even if they do not have
|
||||
the related PKU feature in CPUID. Worse, when they do they report a size of
|
||||
64, whereas the expected size of the PKRU extended save state is 8, therefore
|
||||
the esa->size == eax assertion does not hold.
|
||||
|
||||
The state is already ignored by KVM_GET_SUPPORTED_CPUID because it
|
||||
was not enabled in the host XCR0. However, QEMU kvm_cpu_xsave_init()
|
||||
runs before QEMU invokes arch_prctl() to enable dynamically-enabled
|
||||
save states such as XTILEDATA, and KVM_GET_SUPPORTED_CPUID hides save
|
||||
states that have yet to be enabled. Therefore, kvm_cpu_xsave_init()
|
||||
needs to consult the host CPUID instead of KVM_GET_SUPPORTED_CPUID,
|
||||
and dies with an assertion failure.
|
||||
|
||||
When setting up the ExtSaveArea array to match the host, ignore features that
|
||||
KVM does not report as supported. This will cause QEMU to skip the incorrect
|
||||
CPUID leaf instead of tripping the assertion.
|
||||
|
||||
Closes: https://gitlab.com/qemu-project/qemu/-/issues/916
|
||||
Reported-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Analyzed-by: Yang Zhong <yang.zhong@intel.com>
|
||||
Reported-by: Peter Krempa <pkrempa@redhat.com>
|
||||
Tested-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
target/i386/cpu.c | 4 ++--
|
||||
target/i386/cpu.h | 2 ++
|
||||
target/i386/kvm/kvm-cpu.c | 19 ++++++++++++-------
|
||||
3 files changed, 16 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 1bc03d3eef..551b47ab1e 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -4973,8 +4973,8 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
|
||||
return cpu_list;
|
||||
}
|
||||
|
||||
-static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||||
- bool migratable_only)
|
||||
+uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||||
+ bool migratable_only)
|
||||
{
|
||||
FeatureWordInfo *wi = &feature_word_info[w];
|
||||
uint64_t r = 0;
|
||||
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
|
||||
index eaa99c302f..290f1beaea 100644
|
||||
--- a/target/i386/cpu.h
|
||||
+++ b/target/i386/cpu.h
|
||||
@@ -605,6 +605,8 @@ typedef enum FeatureWord {
|
||||
} FeatureWord;
|
||||
|
||||
typedef uint64_t FeatureWordArray[FEATURE_WORDS];
|
||||
+uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||||
+ bool migratable_only);
|
||||
|
||||
/* cpuid_features bits */
|
||||
#define CPUID_FP87 (1U << 0)
|
||||
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
|
||||
index a35a1bf9fe..5eb955ce9a 100644
|
||||
--- a/target/i386/kvm/kvm-cpu.c
|
||||
+++ b/target/i386/kvm/kvm-cpu.c
|
||||
@@ -99,13 +99,18 @@ static void kvm_cpu_xsave_init(void)
|
||||
for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) {
|
||||
ExtSaveArea *esa = &x86_ext_save_areas[i];
|
||||
|
||||
- if (esa->size) {
|
||||
- host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
|
||||
- if (eax != 0) {
|
||||
- assert(esa->size == eax);
|
||||
- esa->offset = ebx;
|
||||
- esa->ecx = ecx;
|
||||
- }
|
||||
+ if (!esa->size) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits)
|
||||
+ != esa->bits) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
|
||||
+ if (eax != 0) {
|
||||
+ assert(esa->size == eax);
|
||||
+ esa->offset = ebx;
|
||||
+ esa->ecx = ecx;
|
||||
}
|
||||
}
|
||||
}
|
||||
--
|
||||
2.27.0
|
||||
|
||||
25
Remove-the-unused-local-variable-records.patch
Normal file
25
Remove-the-unused-local-variable-records.patch
Normal file
@ -0,0 +1,25 @@
|
||||
From 7b859a86cbdde8bf17619c43a6d4ae687a20f003 Mon Sep 17 00:00:00 2001
|
||||
From: dinglimin <dinglimin@cmss.chinamobile.com>
|
||||
Date: Wed, 29 Jun 2022 16:26:17 +0800
|
||||
Subject: [PATCH] Remove the unused local variable "records".
|
||||
|
||||
Signed-off-by: dinglimin <dinglimin@cmss.chinamobile.com>
|
||||
---
|
||||
tests/migration/guestperf/engine.py | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py
|
||||
index 87a6ab2009..59fca2c70b 100644
|
||||
--- a/tests/migration/guestperf/engine.py
|
||||
+++ b/tests/migration/guestperf/engine.py
|
||||
@@ -65,7 +65,6 @@ def _vcpu_timing(self, pid, tid_list):
|
||||
return records
|
||||
|
||||
def _cpu_timing(self, pid):
|
||||
- records = []
|
||||
now = time.time()
|
||||
|
||||
jiffies_per_sec = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
|
||||
--
|
||||
2.27.0
|
||||
|
||||
25
Remove-this-redundant-return.patch
Normal file
25
Remove-this-redundant-return.patch
Normal file
@ -0,0 +1,25 @@
|
||||
From e7ef56975af8553690afb16f32fe74d62762b853 Mon Sep 17 00:00:00 2001
|
||||
From: dinglimin <dinglimin@cmss.chinamobile.com>
|
||||
Date: Wed, 29 Jun 2022 14:02:59 +0800
|
||||
Subject: [PATCH] Remove this redundant return.
|
||||
|
||||
Signed-off-by: dinglimin <dinglimin@cmss.chinamobile.com>
|
||||
---
|
||||
scripts/vmstate-static-checker.py | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
|
||||
index 539ead62b4..6838bf7e7c 100755
|
||||
--- a/scripts/vmstate-static-checker.py
|
||||
+++ b/scripts/vmstate-static-checker.py
|
||||
@@ -367,7 +367,6 @@ def check_machine_type(s, d):
|
||||
if s["Name"] != d["Name"]:
|
||||
print("Warning: checking incompatible machine types:", end=' ')
|
||||
print("\"" + s["Name"] + "\", \"" + d["Name"] + "\"")
|
||||
- return
|
||||
|
||||
|
||||
def main():
|
||||
--
|
||||
2.27.0
|
||||
|
||||
66
accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch
Normal file
66
accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch
Normal file
@ -0,0 +1,66 @@
|
||||
From 85583352f3bc28badd4cb336517f6a4eb440d5b0 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
|
||||
<huangy81@chinatelecom.cn>
|
||||
Date: Sun, 26 Jun 2022 01:38:34 +0800
|
||||
Subject: [PATCH 2/3] accel/kvm/kvm-all: Introduce kvm_dirty_ring_size function
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Introduce kvm_dirty_ring_size util function to help calculate
|
||||
dirty ring ful time.
|
||||
|
||||
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
Acked-by: Peter Xu <peterx@redhat.com>
|
||||
Message-Id: <f9ce1f550bfc0e3a1f711e17b1dbc8f701700e56.1656177590.git.huangy81@chinatelecom.cn>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 5 +++++
|
||||
accel/stubs/kvm-stub.c | 5 +++++
|
||||
include/sysemu/kvm.h | 2 ++
|
||||
3 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index 3bc6eb6294..d0c4310507 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2332,6 +2332,11 @@ bool kvm_dirty_ring_enabled(void)
|
||||
return kvm_state->kvm_dirty_ring_size ? true : false;
|
||||
}
|
||||
|
||||
+uint32_t kvm_dirty_ring_size(void)
|
||||
+{
|
||||
+ return kvm_state->kvm_dirty_ring_size;
|
||||
+}
|
||||
+
|
||||
static int kvm_init(MachineState *ms)
|
||||
{
|
||||
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
|
||||
index 5319573e00..1128cb2928 100644
|
||||
--- a/accel/stubs/kvm-stub.c
|
||||
+++ b/accel/stubs/kvm-stub.c
|
||||
@@ -152,4 +152,9 @@ bool kvm_dirty_ring_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
+
|
||||
+uint32_t kvm_dirty_ring_size(void)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
#endif
|
||||
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
|
||||
index 2623775c27..19c5c8402a 100644
|
||||
--- a/include/sysemu/kvm.h
|
||||
+++ b/include/sysemu/kvm.h
|
||||
@@ -549,4 +549,6 @@ bool kvm_cpu_check_are_resettable(void);
|
||||
bool kvm_arch_cpu_check_are_resettable(void);
|
||||
|
||||
bool kvm_dirty_ring_enabled(void);
|
||||
+
|
||||
+uint32_t kvm_dirty_ring_size(void);
|
||||
#endif
|
||||
--
|
||||
2.27.0
|
||||
|
||||
106
accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch
Normal file
106
accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch
Normal file
@ -0,0 +1,106 @@
|
||||
From c6f781e50e75fc2e6b819291b6c5ce6c212f018b Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
|
||||
<huangy81@chinatelecom.cn>
|
||||
Date: Sun, 26 Jun 2022 01:38:30 +0800
|
||||
Subject: [PATCH 1/3] accel/kvm/kvm-all: Refactor per-vcpu dirty ring reaping
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add a non-required argument 'CPUState' to kvm_dirty_ring_reap so
|
||||
that it can cover single vcpu dirty-ring-reaping scenario.
|
||||
|
||||
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Message-Id: <c32001242875e83b0d9f78f396fe2dcd380ba9e8.1656177590.git.huangy81@chinatelecom.cn>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 23 +++++++++++++----------
|
||||
1 file changed, 13 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index f2ce5cd45a..3bc6eb6294 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -773,17 +773,20 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu)
|
||||
}
|
||||
|
||||
/* Must be with slots_lock held */
|
||||
-static uint64_t kvm_dirty_ring_reap_locked(KVMState *s)
|
||||
+static uint64_t kvm_dirty_ring_reap_locked(KVMState *s, CPUState* cpu)
|
||||
{
|
||||
int ret;
|
||||
- CPUState *cpu;
|
||||
uint64_t total = 0;
|
||||
int64_t stamp;
|
||||
|
||||
stamp = get_clock();
|
||||
|
||||
- CPU_FOREACH(cpu) {
|
||||
- total += kvm_dirty_ring_reap_one(s, cpu);
|
||||
+ if (cpu) {
|
||||
+ total = kvm_dirty_ring_reap_one(s, cpu);
|
||||
+ } else {
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ total += kvm_dirty_ring_reap_one(s, cpu);
|
||||
+ }
|
||||
}
|
||||
|
||||
if (total) {
|
||||
@@ -804,7 +807,7 @@ static uint64_t kvm_dirty_ring_reap_locked(KVMState *s)
|
||||
* Currently for simplicity, we must hold BQL before calling this. We can
|
||||
* consider to drop the BQL if we're clear with all the race conditions.
|
||||
*/
|
||||
-static uint64_t kvm_dirty_ring_reap(KVMState *s)
|
||||
+static uint64_t kvm_dirty_ring_reap(KVMState *s, CPUState *cpu)
|
||||
{
|
||||
uint64_t total;
|
||||
|
||||
@@ -824,7 +827,7 @@ static uint64_t kvm_dirty_ring_reap(KVMState *s)
|
||||
* reset below.
|
||||
*/
|
||||
kvm_slots_lock();
|
||||
- total = kvm_dirty_ring_reap_locked(s);
|
||||
+ total = kvm_dirty_ring_reap_locked(s, cpu);
|
||||
kvm_slots_unlock();
|
||||
|
||||
return total;
|
||||
@@ -871,7 +874,7 @@ static void kvm_dirty_ring_flush(void)
|
||||
* vcpus out in a synchronous way.
|
||||
*/
|
||||
kvm_cpu_synchronize_kick_all();
|
||||
- kvm_dirty_ring_reap(kvm_state);
|
||||
+ kvm_dirty_ring_reap(kvm_state, NULL);
|
||||
trace_kvm_dirty_ring_flush(1);
|
||||
}
|
||||
|
||||
@@ -1415,7 +1418,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
* Not easy. Let's cross the fingers until it's fixed.
|
||||
*/
|
||||
if (kvm_state->kvm_dirty_ring_size) {
|
||||
- kvm_dirty_ring_reap_locked(kvm_state);
|
||||
+ kvm_dirty_ring_reap_locked(kvm_state, NULL);
|
||||
} else {
|
||||
kvm_slot_get_dirty_log(kvm_state, mem);
|
||||
}
|
||||
@@ -1487,7 +1490,7 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
|
||||
r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING;
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
- kvm_dirty_ring_reap(s);
|
||||
+ kvm_dirty_ring_reap(s, NULL);
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
r->reaper_iteration++;
|
||||
@@ -2957,7 +2960,7 @@ int kvm_cpu_exec(CPUState *cpu)
|
||||
*/
|
||||
trace_kvm_dirty_ring_full(cpu->cpu_index);
|
||||
qemu_mutex_lock_iothread();
|
||||
- kvm_dirty_ring_reap(kvm_state);
|
||||
+ kvm_dirty_ring_reap(kvm_state, NULL);
|
||||
qemu_mutex_unlock_iothread();
|
||||
ret = 0;
|
||||
break;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
73
cpus-Introduce-cpu_list_generation_id.patch
Normal file
73
cpus-Introduce-cpu_list_generation_id.patch
Normal file
@ -0,0 +1,73 @@
|
||||
From 6e057dd5df580f0e525d808f5476ee973280371d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
|
||||
<huangy81@chinatelecom.cn>
|
||||
Date: Sun, 26 Jun 2022 01:38:31 +0800
|
||||
Subject: [PATCH 2/3] cpus: Introduce cpu_list_generation_id
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Introduce cpu_list_generation_id to track cpu list generation so
|
||||
that cpu hotplug/unplug can be detected during measurement of
|
||||
dirty page rate.
|
||||
|
||||
cpu_list_generation_id could be used to detect changes of cpu
|
||||
list, which is prepared for dirty page rate measurement.
|
||||
|
||||
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Message-Id: <06e1f1362b2501a471dce796abb065b04f320fa5.1656177590.git.huangy81@chinatelecom.cn>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
cpus-common.c | 8 ++++++++
|
||||
include/exec/cpu-common.h | 1 +
|
||||
2 files changed, 9 insertions(+)
|
||||
|
||||
diff --git a/cpus-common.c b/cpus-common.c
|
||||
index 6e73d3e58d..31c6415f37 100644
|
||||
--- a/cpus-common.c
|
||||
+++ b/cpus-common.c
|
||||
@@ -73,6 +73,12 @@ static int cpu_get_free_index(void)
|
||||
}
|
||||
|
||||
CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
|
||||
+static unsigned int cpu_list_generation_id;
|
||||
+
|
||||
+unsigned int cpu_list_generation_id_get(void)
|
||||
+{
|
||||
+ return cpu_list_generation_id;
|
||||
+}
|
||||
|
||||
void cpu_list_add(CPUState *cpu)
|
||||
{
|
||||
@@ -84,6 +90,7 @@ void cpu_list_add(CPUState *cpu)
|
||||
assert(!cpu_index_auto_assigned);
|
||||
}
|
||||
QTAILQ_INSERT_TAIL_RCU(&cpus, cpu, node);
|
||||
+ cpu_list_generation_id++;
|
||||
}
|
||||
|
||||
void cpu_list_remove(CPUState *cpu)
|
||||
@@ -96,6 +103,7 @@ void cpu_list_remove(CPUState *cpu)
|
||||
|
||||
QTAILQ_REMOVE_RCU(&cpus, cpu, node);
|
||||
cpu->cpu_index = UNASSIGNED_CPU_INDEX;
|
||||
+ cpu_list_generation_id++;
|
||||
}
|
||||
|
||||
CPUState *qemu_get_cpu(int index)
|
||||
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
|
||||
index 039d422bf4..cdee668f20 100644
|
||||
--- a/include/exec/cpu-common.h
|
||||
+++ b/include/exec/cpu-common.h
|
||||
@@ -11,6 +11,7 @@
|
||||
void qemu_init_cpu_list(void);
|
||||
void cpu_list_lock(void);
|
||||
void cpu_list_unlock(void);
|
||||
+unsigned int cpu_list_generation_id_get(void);
|
||||
|
||||
void tcg_flush_softmmu_tlb(CPUState *cs);
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
25
fix-compilation-errors-of-sw64-architecture-on-x86-p.patch
Normal file
25
fix-compilation-errors-of-sw64-architecture-on-x86-p.patch
Normal file
@ -0,0 +1,25 @@
|
||||
From 58471cd8dcf8e6a66113ddf9bb4ac45c89bbd57b Mon Sep 17 00:00:00 2001
|
||||
From: lifeng 71117973 <lif121@chinatelecom.cn>
|
||||
Date: Wed, 2 Nov 2022 11:19:55 +0800
|
||||
Subject: [PATCH 1/2] fix compilation errors of sw64 architecture on x86
|
||||
platform
|
||||
|
||||
---
|
||||
target/sw64/float_helper.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/target/sw64/float_helper.c b/target/sw64/float_helper.c
|
||||
index ad1c3cce48..c8e0845afc 100644
|
||||
--- a/target/sw64/float_helper.c
|
||||
+++ b/target/sw64/float_helper.c
|
||||
@@ -653,7 +653,6 @@ void helper_ieee_input(CPUSW64State *env, uint64_t val)
|
||||
{
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
|
||||
- uint64_t frac = val & 0xfffffffffffffull;
|
||||
|
||||
if (exp == 0x7ff) {
|
||||
/* Infinity or NaN. */
|
||||
--
|
||||
2.27.0
|
||||
|
||||
28
fixed-the-error-that-no-bios-file-soft-link-was-crea.patch
Normal file
28
fixed-the-error-that-no-bios-file-soft-link-was-crea.patch
Normal file
@ -0,0 +1,28 @@
|
||||
From cf6be03a1f5b7595a2ecada71fa8aa30de744703 Mon Sep 17 00:00:00 2001
|
||||
From: lifeng 71117973 <lif121@chinatelecom.cn>
|
||||
Date: Wed, 2 Nov 2022 17:20:50 +0800
|
||||
Subject: [PATCH 2/2] fixed the error that no bios file soft link was created
|
||||
in the build directory when compiling the sw64 architecture
|
||||
|
||||
---
|
||||
configure | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/configure b/configure
|
||||
index 9569d7a3d0..0ae7bcf065 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -3861,7 +3861,9 @@ for bios_file in \
|
||||
$source_path/pc-bios/u-boot.* \
|
||||
$source_path/pc-bios/edk2-*.fd.bz2 \
|
||||
$source_path/pc-bios/palcode-* \
|
||||
- $source_path/pc-bios/qemu_vga.ndrv
|
||||
+ $source_path/pc-bios/qemu_vga.ndrv \
|
||||
+ $source_path/pc-bios/core* \
|
||||
+ $source_path/pc-bios/uefi-bios-sw
|
||||
|
||||
do
|
||||
LINKS="$LINKS pc-bios/$(basename $bios_file)"
|
||||
--
|
||||
2.27.0
|
||||
|
||||
34
hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch
Normal file
34
hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch
Normal file
@ -0,0 +1,34 @@
|
||||
From 4f66d261c0f20189e387de57baca17167cc542ab Mon Sep 17 00:00:00 2001
|
||||
From: Andy Pei <andy.pei@intel.com>
|
||||
Date: Mon, 3 Jan 2022 17:28:12 +0800
|
||||
Subject: [PATCH] hw/vhost-user-blk: turn on VIRTIO_BLK_F_SIZE_MAX feature for
|
||||
virtio blk device
|
||||
|
||||
Turn on pre-defined feature VIRTIO_BLK_F_SIZE_MAX for virtio blk device to
|
||||
avoid guest DMA request sizes which are too large for hardware spec.
|
||||
|
||||
Signed-off-by: dinglimin <dinglimin@cmss.chinamobile.com>
|
||||
Signed-off-by: Andy Pei <andy.pei@intel.com>
|
||||
Message-Id: <1641202092-149677-1-git-send-email-andy.pei@intel.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Acked-by: Raphael Norwitz <raphael.norwitz@nutanix.com>
|
||||
---
|
||||
hw/block/vhost-user-blk.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
|
||||
index ba13cb87e5..eb1264afc7 100644
|
||||
--- a/hw/block/vhost-user-blk.c
|
||||
+++ b/hw/block/vhost-user-blk.c
|
||||
@@ -252,6 +252,7 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
|
||||
VHostUserBlk *s = VHOST_USER_BLK(vdev);
|
||||
|
||||
/* Turn on pre-defined features */
|
||||
+ virtio_add_feature(&features, VIRTIO_BLK_F_SIZE_MAX);
|
||||
virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
|
||||
virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
|
||||
virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
|
||||
--
|
||||
2.27.0
|
||||
|
||||
65
linux-headers-include-missing-changes-from-5.17.patch
Normal file
65
linux-headers-include-missing-changes-from-5.17.patch
Normal file
@ -0,0 +1,65 @@
|
||||
From d6398243714a7a775c64e74dbd63c00863cb7e83 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Tue, 22 Feb 2022 17:58:11 +0100
|
||||
Subject: [PATCH 01/10] linux-headers: include missing changes from 5.17
|
||||
|
||||
mainline inclusion
|
||||
from mainline-v7.0.0-rc0
|
||||
commit 1ea5208febcc068449b63282d72bb719ab67a466
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit 1ea5208febcc ("linux-headers: include missing changes from 5.17")
|
||||
|
||||
------------------------------------------------
|
||||
|
||||
linux-headers: include missing changes from 5.17
|
||||
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
linux-headers/asm-x86/kvm.h | 3 +++
|
||||
linux-headers/linux/kvm.h | 7 +++++++
|
||||
2 files changed, 10 insertions(+)
|
||||
|
||||
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
|
||||
index a6c327f8ad..2ab4f1818a 100644
|
||||
--- a/linux-headers/asm-x86/kvm.h
|
||||
+++ b/linux-headers/asm-x86/kvm.h
|
||||
@@ -437,6 +437,9 @@ struct kvm_sync_regs {
|
||||
|
||||
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
|
||||
|
||||
+/* attributes for system fd (group 0) */
|
||||
+#define KVM_X86_XCOMP_GUEST_SUPP 0
|
||||
+
|
||||
struct kvm_vmx_nested_state_data {
|
||||
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
|
||||
index 5d8e42b8f8..7870cd0280 100644
|
||||
--- a/linux-headers/linux/kvm.h
|
||||
+++ b/linux-headers/linux/kvm.h
|
||||
@@ -1112,6 +1112,10 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_BINARY_STATS_FD 203
|
||||
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
|
||||
#define KVM_CAP_ARM_MTE 205
|
||||
+#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
|
||||
+#define KVM_CAP_VM_GPA_BITS 207
|
||||
+#define KVM_CAP_XSAVE2 208
|
||||
+#define KVM_CAP_SYS_ATTRIBUTES 209
|
||||
|
||||
#define KVM_CAP_ARM_CPU_FEATURE 555
|
||||
|
||||
@@ -2006,4 +2010,7 @@ struct kvm_stats_desc {
|
||||
|
||||
#define KVM_GET_STATS_FD _IO(KVMIO, 0xce)
|
||||
|
||||
+/* Available with KVM_CAP_XSAVE2 */
|
||||
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
|
||||
+
|
||||
#endif /* __LINUX_KVM_H */
|
||||
--
|
||||
2.27.0
|
||||
|
||||
399
migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch
Normal file
399
migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch
Normal file
@ -0,0 +1,399 @@
|
||||
From b6d1e022b7bb06faf2dcad3062b7061b59ef68a9 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
|
||||
<huangy81@chinatelecom.cn>
|
||||
Date: Sun, 26 Jun 2022 01:38:32 +0800
|
||||
Subject: [PATCH 3/3] migration/dirtyrate: Refactor dirty page rate calculation
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
abstract out dirty log change logic into function
|
||||
global_dirty_log_change.
|
||||
|
||||
abstract out dirty page rate calculation logic via
|
||||
dirty-ring into function vcpu_calculate_dirtyrate.
|
||||
|
||||
abstract out mathematical dirty page rate calculation
|
||||
into do_calculate_dirtyrate, decouple it from DirtyStat.
|
||||
|
||||
rename set_sample_page_period to dirty_stat_wait, which
|
||||
is well-understood and will be reused in dirtylimit.
|
||||
|
||||
handle cpu hotplug/unplug scenario during measurement of
|
||||
dirty page rate.
|
||||
|
||||
export util functions outside migration.
|
||||
|
||||
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Message-Id: <7b6f6f4748d5b3d017b31a0429e630229ae97538.1656177590.git.huangy81@chinatelecom.cn>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
include/sysemu/dirtyrate.h | 28 +++++
|
||||
migration/dirtyrate.c | 227 +++++++++++++++++++++++--------------
|
||||
migration/dirtyrate.h | 7 +-
|
||||
3 files changed, 174 insertions(+), 88 deletions(-)
|
||||
create mode 100644 include/sysemu/dirtyrate.h
|
||||
|
||||
diff --git a/include/sysemu/dirtyrate.h b/include/sysemu/dirtyrate.h
|
||||
new file mode 100644
|
||||
index 0000000000..4d3b9a4902
|
||||
--- /dev/null
|
||||
+++ b/include/sysemu/dirtyrate.h
|
||||
@@ -0,0 +1,28 @@
|
||||
+/*
|
||||
+ * dirty page rate helper functions
|
||||
+ *
|
||||
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
|
||||
+ *
|
||||
+ * Authors:
|
||||
+ * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+
|
||||
+#ifndef QEMU_DIRTYRATE_H
|
||||
+#define QEMU_DIRTYRATE_H
|
||||
+
|
||||
+typedef struct VcpuStat {
|
||||
+ int nvcpu; /* number of vcpu */
|
||||
+ DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
|
||||
+} VcpuStat;
|
||||
+
|
||||
+int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
|
||||
+ VcpuStat *stat,
|
||||
+ unsigned int flag,
|
||||
+ bool one_shot);
|
||||
+
|
||||
+void global_dirty_log_change(unsigned int flag,
|
||||
+ bool start);
|
||||
+#endif
|
||||
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
|
||||
index 8043bc7946..c449095fc3 100644
|
||||
--- a/migration/dirtyrate.c
|
||||
+++ b/migration/dirtyrate.c
|
||||
@@ -46,7 +46,7 @@ static struct DirtyRateStat DirtyStat;
|
||||
static DirtyRateMeasureMode dirtyrate_mode =
|
||||
DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
|
||||
|
||||
-static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
|
||||
+static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
|
||||
{
|
||||
int64_t current_time;
|
||||
|
||||
@@ -60,6 +60,132 @@ static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
|
||||
return msec;
|
||||
}
|
||||
|
||||
+static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
|
||||
+ CPUState *cpu, bool start)
|
||||
+{
|
||||
+ if (start) {
|
||||
+ dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
|
||||
+ } else {
|
||||
+ dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
|
||||
+ int64_t calc_time_ms)
|
||||
+{
|
||||
+ uint64_t memory_size_MB;
|
||||
+ uint64_t increased_dirty_pages =
|
||||
+ dirty_pages.end_pages - dirty_pages.start_pages;
|
||||
+
|
||||
+ memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
|
||||
+
|
||||
+ return memory_size_MB * 1000 / calc_time_ms;
|
||||
+}
|
||||
+
|
||||
+void global_dirty_log_change(unsigned int flag, bool start)
|
||||
+{
|
||||
+ qemu_mutex_lock_iothread();
|
||||
+ if (start) {
|
||||
+ memory_global_dirty_log_start(flag);
|
||||
+ } else {
|
||||
+ memory_global_dirty_log_stop(flag);
|
||||
+ }
|
||||
+ qemu_mutex_unlock_iothread();
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * global_dirty_log_sync
|
||||
+ * 1. sync dirty log from kvm
|
||||
+ * 2. stop dirty tracking if needed.
|
||||
+ */
|
||||
+static void global_dirty_log_sync(unsigned int flag, bool one_shot)
|
||||
+{
|
||||
+ qemu_mutex_lock_iothread();
|
||||
+ memory_global_dirty_log_sync();
|
||||
+ if (one_shot) {
|
||||
+ memory_global_dirty_log_stop(flag);
|
||||
+ }
|
||||
+ qemu_mutex_unlock_iothread();
|
||||
+}
|
||||
+
|
||||
+static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+ DirtyPageRecord *records;
|
||||
+ int nvcpu = 0;
|
||||
+
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ nvcpu++;
|
||||
+ }
|
||||
+
|
||||
+ stat->nvcpu = nvcpu;
|
||||
+ stat->rates = g_malloc0(sizeof(DirtyRateVcpu) * nvcpu);
|
||||
+
|
||||
+ records = g_malloc0(sizeof(DirtyPageRecord) * nvcpu);
|
||||
+
|
||||
+ return records;
|
||||
+}
|
||||
+
|
||||
+static void vcpu_dirty_stat_collect(VcpuStat *stat,
|
||||
+ DirtyPageRecord *records,
|
||||
+ bool start)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ record_dirtypages(records, cpu, start);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
|
||||
+ VcpuStat *stat,
|
||||
+ unsigned int flag,
|
||||
+ bool one_shot)
|
||||
+{
|
||||
+ DirtyPageRecord *records;
|
||||
+ int64_t init_time_ms;
|
||||
+ int64_t duration;
|
||||
+ int64_t dirtyrate;
|
||||
+ int i = 0;
|
||||
+ unsigned int gen_id;
|
||||
+
|
||||
+retry:
|
||||
+ init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
+
|
||||
+ cpu_list_lock();
|
||||
+ gen_id = cpu_list_generation_id_get();
|
||||
+ records = vcpu_dirty_stat_alloc(stat);
|
||||
+ vcpu_dirty_stat_collect(stat, records, true);
|
||||
+ cpu_list_unlock();
|
||||
+
|
||||
+ duration = dirty_stat_wait(calc_time_ms, init_time_ms);
|
||||
+
|
||||
+ global_dirty_log_sync(flag, one_shot);
|
||||
+
|
||||
+ cpu_list_lock();
|
||||
+ if (gen_id != cpu_list_generation_id_get()) {
|
||||
+ g_free(records);
|
||||
+ g_free(stat->rates);
|
||||
+ cpu_list_unlock();
|
||||
+ goto retry;
|
||||
+ }
|
||||
+ vcpu_dirty_stat_collect(stat, records, false);
|
||||
+ cpu_list_unlock();
|
||||
+
|
||||
+ for (i = 0; i < stat->nvcpu; i++) {
|
||||
+ dirtyrate = do_calculate_dirtyrate(records[i], duration);
|
||||
+
|
||||
+ stat->rates[i].id = i;
|
||||
+ stat->rates[i].dirty_rate = dirtyrate;
|
||||
+
|
||||
+ trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
|
||||
+ }
|
||||
+
|
||||
+ g_free(records);
|
||||
+
|
||||
+ return duration;
|
||||
+}
|
||||
+
|
||||
static bool is_sample_period_valid(int64_t sec)
|
||||
{
|
||||
if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC ||
|
||||
@@ -396,44 +522,6 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
|
||||
return true;
|
||||
}
|
||||
|
||||
-static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
|
||||
- CPUState *cpu, bool start)
|
||||
-{
|
||||
- if (start) {
|
||||
- dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
|
||||
- } else {
|
||||
- dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-static void dirtyrate_global_dirty_log_start(void)
|
||||
-{
|
||||
- qemu_mutex_lock_iothread();
|
||||
- memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
|
||||
- qemu_mutex_unlock_iothread();
|
||||
-}
|
||||
-
|
||||
-static void dirtyrate_global_dirty_log_stop(void)
|
||||
-{
|
||||
- qemu_mutex_lock_iothread();
|
||||
- memory_global_dirty_log_sync();
|
||||
- memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE);
|
||||
- qemu_mutex_unlock_iothread();
|
||||
-}
|
||||
-
|
||||
-static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages)
|
||||
-{
|
||||
- uint64_t memory_size_MB;
|
||||
- int64_t time_s;
|
||||
- uint64_t increased_dirty_pages =
|
||||
- dirty_pages.end_pages - dirty_pages.start_pages;
|
||||
-
|
||||
- memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
|
||||
- time_s = DirtyStat.calc_time;
|
||||
-
|
||||
- return memory_size_MB / time_s;
|
||||
-}
|
||||
-
|
||||
static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
|
||||
bool start)
|
||||
{
|
||||
@@ -444,11 +532,6 @@ static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
|
||||
}
|
||||
}
|
||||
|
||||
-static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages)
|
||||
-{
|
||||
- DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages);
|
||||
-}
|
||||
-
|
||||
static inline void dirtyrate_manual_reset_protect(void)
|
||||
{
|
||||
RAMBlock *block = NULL;
|
||||
@@ -492,71 +575,49 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
|
||||
DirtyStat.start_time = start_time / 1000;
|
||||
|
||||
msec = config.sample_period_seconds * 1000;
|
||||
- msec = set_sample_page_period(msec, start_time);
|
||||
+ msec = dirty_stat_wait(msec, start_time);
|
||||
DirtyStat.calc_time = msec / 1000;
|
||||
|
||||
/*
|
||||
- * dirtyrate_global_dirty_log_stop do two things.
|
||||
+ * do two things.
|
||||
* 1. fetch dirty bitmap from kvm
|
||||
* 2. stop dirty tracking
|
||||
*/
|
||||
- dirtyrate_global_dirty_log_stop();
|
||||
+ global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
|
||||
|
||||
record_dirtypages_bitmap(&dirty_pages, false);
|
||||
|
||||
- do_calculate_dirtyrate_bitmap(dirty_pages);
|
||||
+ DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages, msec);
|
||||
}
|
||||
|
||||
static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
|
||||
{
|
||||
- CPUState *cpu;
|
||||
- int64_t msec = 0;
|
||||
- int64_t start_time;
|
||||
+ int64_t duration;
|
||||
uint64_t dirtyrate = 0;
|
||||
uint64_t dirtyrate_sum = 0;
|
||||
- DirtyPageRecord *dirty_pages;
|
||||
- int nvcpu = 0;
|
||||
int i = 0;
|
||||
|
||||
- CPU_FOREACH(cpu) {
|
||||
- nvcpu++;
|
||||
- }
|
||||
-
|
||||
- dirty_pages = g_new(DirtyPageRecord, nvcpu);
|
||||
-
|
||||
- DirtyStat.dirty_ring.nvcpu = nvcpu;
|
||||
- DirtyStat.dirty_ring.rates = g_new(DirtyRateVcpu, nvcpu);
|
||||
-
|
||||
- dirtyrate_global_dirty_log_start();
|
||||
-
|
||||
- CPU_FOREACH(cpu) {
|
||||
- record_dirtypages(dirty_pages, cpu, true);
|
||||
- }
|
||||
-
|
||||
- start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
- DirtyStat.start_time = start_time / 1000;
|
||||
+ /* start log sync */
|
||||
+ global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
|
||||
|
||||
- msec = config.sample_period_seconds * 1000;
|
||||
- msec = set_sample_page_period(msec, start_time);
|
||||
- DirtyStat.calc_time = msec / 1000;
|
||||
+ DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
|
||||
|
||||
- dirtyrate_global_dirty_log_stop();
|
||||
+ /* calculate vcpu dirtyrate */
|
||||
+ duration = vcpu_calculate_dirtyrate(config.sample_period_seconds * 1000,
|
||||
+ &DirtyStat.dirty_ring,
|
||||
+ GLOBAL_DIRTY_DIRTY_RATE,
|
||||
+ true);
|
||||
|
||||
- CPU_FOREACH(cpu) {
|
||||
- record_dirtypages(dirty_pages, cpu, false);
|
||||
- }
|
||||
+ DirtyStat.calc_time = duration / 1000;
|
||||
|
||||
+ /* calculate vm dirtyrate */
|
||||
for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
|
||||
- dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]);
|
||||
- trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
|
||||
-
|
||||
- DirtyStat.dirty_ring.rates[i].id = i;
|
||||
+ dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
|
||||
DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
|
||||
dirtyrate_sum += dirtyrate;
|
||||
}
|
||||
|
||||
DirtyStat.dirty_rate = dirtyrate_sum;
|
||||
- g_free(dirty_pages);
|
||||
}
|
||||
|
||||
static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
|
||||
@@ -574,7 +635,7 @@ static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
|
||||
rcu_read_unlock();
|
||||
|
||||
msec = config.sample_period_seconds * 1000;
|
||||
- msec = set_sample_page_period(msec, initial_time);
|
||||
+ msec = dirty_stat_wait(msec, initial_time);
|
||||
DirtyStat.start_time = initial_time / 1000;
|
||||
DirtyStat.calc_time = msec / 1000;
|
||||
|
||||
diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
|
||||
index 69d4c5b865..594a5c0bb6 100644
|
||||
--- a/migration/dirtyrate.h
|
||||
+++ b/migration/dirtyrate.h
|
||||
@@ -13,6 +13,8 @@
|
||||
#ifndef QEMU_MIGRATION_DIRTYRATE_H
|
||||
#define QEMU_MIGRATION_DIRTYRATE_H
|
||||
|
||||
+#include "sysemu/dirtyrate.h"
|
||||
+
|
||||
/*
|
||||
* Sample 512 pages per GB as default.
|
||||
*/
|
||||
@@ -65,11 +67,6 @@ typedef struct SampleVMStat {
|
||||
uint64_t total_block_mem_MB; /* size of total sampled pages in MB */
|
||||
} SampleVMStat;
|
||||
|
||||
-typedef struct VcpuStat {
|
||||
- int nvcpu; /* number of vcpu */
|
||||
- DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
|
||||
-} VcpuStat;
|
||||
-
|
||||
/*
|
||||
* Store calculation statistics for each measure.
|
||||
*/
|
||||
--
|
||||
2.27.0
|
||||
|
||||
48
migration-dirtyrate-Replace-malloc-with-g_new.patch
Normal file
48
migration-dirtyrate-Replace-malloc-with-g_new.patch
Normal file
@ -0,0 +1,48 @@
|
||||
From 7cb2d342b9073ec9548202df6e1fb25fa4997d71 Mon Sep 17 00:00:00 2001
|
||||
From: jianchunfu <jianchunfu_yewu@cmss.chinamobile.com>
|
||||
Date: Thu, 30 Jun 2022 11:34:50 +0000
|
||||
Subject: [PATCH] migration/dirtyrate: Replace malloc with g_new Using macro
|
||||
g_new() to handling potential memory allocation failures in dirtyrate.
|
||||
|
||||
---
|
||||
migration/dirtyrate.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
|
||||
index d65e744af9..8043bc7946 100644
|
||||
--- a/migration/dirtyrate.c
|
||||
+++ b/migration/dirtyrate.c
|
||||
@@ -157,7 +157,7 @@ static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
|
||||
{
|
||||
/* last calc-dirty-rate qmp use dirty ring mode */
|
||||
if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
|
||||
- free(DirtyStat.dirty_ring.rates);
|
||||
+ g_free(DirtyStat.dirty_ring.rates);
|
||||
DirtyStat.dirty_ring.rates = NULL;
|
||||
}
|
||||
}
|
||||
@@ -522,10 +522,10 @@ static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
|
||||
nvcpu++;
|
||||
}
|
||||
|
||||
- dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);
|
||||
+ dirty_pages = g_new(DirtyPageRecord, nvcpu);
|
||||
|
||||
DirtyStat.dirty_ring.nvcpu = nvcpu;
|
||||
- DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
|
||||
+ DirtyStat.dirty_ring.rates = g_new(DirtyRateVcpu, nvcpu);
|
||||
|
||||
dirtyrate_global_dirty_log_start();
|
||||
|
||||
@@ -556,7 +556,7 @@ static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
|
||||
}
|
||||
|
||||
DirtyStat.dirty_rate = dirtyrate_sum;
|
||||
- free(dirty_pages);
|
||||
+ g_free(dirty_pages);
|
||||
}
|
||||
|
||||
static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
|
||||
--
|
||||
2.27.0
|
||||
|
||||
31
qemu.spec
31
qemu.spec
@ -1,6 +1,6 @@
|
||||
Name: qemu
|
||||
Version: 6.2.0
|
||||
Release: 54
|
||||
Release: 55
|
||||
Epoch: 10
|
||||
Summary: QEMU is a generic and open source machine emulator and virtualizer
|
||||
License: GPLv2 and BSD and MIT and CC-BY-SA-4.0
|
||||
@ -317,6 +317,30 @@ Patch0304: pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch
|
||||
Patch0305: acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch
|
||||
Patch0306: hw-display-ati_2d-Fix-buffer-overflow-in-ati_2d_blt-.patch
|
||||
Patch0307: ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch
|
||||
Patch0308: Remove-the-unused-local-variable-records.patch
|
||||
Patch0309: Remove-this-redundant-return.patch
|
||||
Patch0310: hw-vhost-user-blk-turn-on-VIRTIO_BLK_F_SIZE_MAX-feat.patch
|
||||
Patch0311: migration-dirtyrate-Replace-malloc-with-g_new.patch
|
||||
Patch0312: accel-kvm-kvm-all-Refactor-per-vcpu-dirty-ring-reapi.patch
|
||||
Patch0313: cpus-Introduce-cpu_list_generation_id.patch
|
||||
Patch0314: migration-dirtyrate-Refactor-dirty-page-rate-calcula.patch
|
||||
Patch0315: softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch
|
||||
Patch0316: accel-kvm-kvm-all-Introduce-kvm_dirty_ring_size-func.patch
|
||||
Patch0317: softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch
|
||||
Patch0318: softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch
|
||||
Patch0319: tests-Add-dirty-page-rate-limit-test.patch
|
||||
Patch0320: linux-headers-include-missing-changes-from-5.17.patch
|
||||
Patch0321: x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
|
||||
Patch0322: x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
|
||||
Patch0323: x86-Grant-AMX-permission-for-guest.patch
|
||||
Patch0324: x86-Add-XFD-faulting-bit-for-state-components.patch
|
||||
Patch0325: x86-Add-AMX-CPUIDs-enumeration.patch
|
||||
Patch0326: x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch
|
||||
Patch0327: x86-Support-XFD-and-AMX-xsave-data-migration.patch
|
||||
Patch0328: target-i386-kvm-do-not-access-uninitialized-variable.patch
|
||||
Patch0329: KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
|
||||
Patch0330: fix-compilation-errors-of-sw64-architecture-on-x86-p.patch
|
||||
Patch0331: fixed-the-error-that-no-bios-file-soft-link-was-crea.patch
|
||||
|
||||
BuildRequires: flex
|
||||
BuildRequires: gcc
|
||||
@ -831,6 +855,11 @@ getent passwd qemu >/dev/null || \
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Thu Nov 03 2022 yezengruan <yezengruan@huawei.com> - 10:6.2.0-55
|
||||
- support dirty restraint on vCPU
|
||||
- support SPR AMX in Qemu
|
||||
- fix compilation errors of sw64
|
||||
|
||||
* Mon Oct 24 2022 fushanqing <fushanqing@kylinos.cn> - 10:6.2.0-54
|
||||
- add '--enable-slirp' compilation options
|
||||
|
||||
|
||||
435
softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch
Normal file
435
softmmu-dirtylimit-Implement-dirty-page-rate-limit.patch
Normal file
@ -0,0 +1,435 @@
|
||||
From 39d9c1f6de01abf003980f4c2fe3c08f9e6cd60c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
|
||||
<huangy81@chinatelecom.cn>
|
||||
Date: Sun, 26 Jun 2022 01:38:36 +0800
|
||||
Subject: [PATCH] softmmu/dirtylimit: Implement dirty page rate limit
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Implement dirtyrate calculation periodically basing on
|
||||
dirty-ring and throttle virtual CPU until it reachs the quota
|
||||
dirty page rate given by user.
|
||||
|
||||
Introduce qmp commands "set-vcpu-dirty-limit",
|
||||
"cancel-vcpu-dirty-limit", "query-vcpu-dirty-limit"
|
||||
to enable, disable, query dirty page limit for virtual CPU.
|
||||
|
||||
Meanwhile, introduce corresponding hmp commands
|
||||
"set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit",
|
||||
"info vcpu_dirty_limit" so the feature can be more usable.
|
||||
|
||||
"query-vcpu-dirty-limit" success depends on enabling dirty
|
||||
page rate limit, so just add it to the list of skipped
|
||||
command to ensure qmp-cmd-test run successfully.
|
||||
|
||||
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
Acked-by: Markus Armbruster <armbru@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Message-Id: <4143f26706d413dd29db0b672fe58b3d3fbe34bc.1656177590.git.huangy81@chinatelecom.cn>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
hmp-commands-info.hx | 13 +++
|
||||
hmp-commands.hx | 32 ++++++
|
||||
include/monitor/hmp.h | 3 +
|
||||
qapi/migration.json | 80 +++++++++++++++
|
||||
softmmu/dirtylimit.c | 194 +++++++++++++++++++++++++++++++++++++
|
||||
tests/qtest/qmp-cmd-test.c | 2 +
|
||||
6 files changed, 324 insertions(+)
|
||||
|
||||
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
|
||||
index 407a1da800..5dd3001af0 100644
|
||||
--- a/hmp-commands-info.hx
|
||||
+++ b/hmp-commands-info.hx
|
||||
@@ -863,6 +863,19 @@ SRST
|
||||
Display the vcpu dirty rate information.
|
||||
ERST
|
||||
|
||||
+ {
|
||||
+ .name = "vcpu_dirty_limit",
|
||||
+ .args_type = "",
|
||||
+ .params = "",
|
||||
+ .help = "show dirty page limit information of all vCPU",
|
||||
+ .cmd = hmp_info_vcpu_dirty_limit,
|
||||
+ },
|
||||
+
|
||||
+SRST
|
||||
+ ``info vcpu_dirty_limit``
|
||||
+ Display the vcpu dirty page limit information.
|
||||
+ERST
|
||||
+
|
||||
#if defined(TARGET_I386)
|
||||
{
|
||||
.name = "sgx",
|
||||
diff --git a/hmp-commands.hx b/hmp-commands.hx
|
||||
index 70a9136ac2..5bedee2d49 100644
|
||||
--- a/hmp-commands.hx
|
||||
+++ b/hmp-commands.hx
|
||||
@@ -1744,3 +1744,35 @@ ERST
|
||||
"\n\t\t\t -b to specify dirty bitmap as method of calculation)",
|
||||
.cmd = hmp_calc_dirty_rate,
|
||||
},
|
||||
+
|
||||
+SRST
|
||||
+``set_vcpu_dirty_limit``
|
||||
+ Set dirty page rate limit on virtual CPU, the information about all the
|
||||
+ virtual CPU dirty limit status can be observed with ``info vcpu_dirty_limit``
|
||||
+ command.
|
||||
+ERST
|
||||
+
|
||||
+ {
|
||||
+ .name = "set_vcpu_dirty_limit",
|
||||
+ .args_type = "dirty_rate:l,cpu_index:l?",
|
||||
+ .params = "dirty_rate [cpu_index]",
|
||||
+ .help = "set dirty page rate limit, use cpu_index to set limit"
|
||||
+ "\n\t\t\t\t\t on a specified virtual cpu",
|
||||
+ .cmd = hmp_set_vcpu_dirty_limit,
|
||||
+ },
|
||||
+
|
||||
+SRST
|
||||
+``cancel_vcpu_dirty_limit``
|
||||
+ Cancel dirty page rate limit on virtual CPU, the information about all the
|
||||
+ virtual CPU dirty limit status can be observed with ``info vcpu_dirty_limit``
|
||||
+ command.
|
||||
+ERST
|
||||
+
|
||||
+ {
|
||||
+ .name = "cancel_vcpu_dirty_limit",
|
||||
+ .args_type = "cpu_index:l?",
|
||||
+ .params = "[cpu_index]",
|
||||
+ .help = "cancel dirty page rate limit, use cpu_index to cancel"
|
||||
+ "\n\t\t\t\t\t limit on a specified virtual cpu",
|
||||
+ .cmd = hmp_cancel_vcpu_dirty_limit,
|
||||
+ },
|
||||
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
|
||||
index 96d014826a..478820e54f 100644
|
||||
--- a/include/monitor/hmp.h
|
||||
+++ b/include/monitor/hmp.h
|
||||
@@ -131,6 +131,9 @@ void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
|
||||
void hmp_replay_seek(Monitor *mon, const QDict *qdict);
|
||||
void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict);
|
||||
void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict);
|
||||
+void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict);
|
||||
+void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict);
|
||||
+void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict);
|
||||
void hmp_human_readable_text_helper(Monitor *mon,
|
||||
HumanReadableText *(*qmp_handler)(Error **));
|
||||
|
||||
diff --git a/qapi/migration.json b/qapi/migration.json
|
||||
index d4ebc5f028..fee266017d 100644
|
||||
--- a/qapi/migration.json
|
||||
+++ b/qapi/migration.json
|
||||
@@ -1874,6 +1874,86 @@
|
||||
##
|
||||
{ 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
|
||||
|
||||
+##
|
||||
+# @DirtyLimitInfo:
|
||||
+#
|
||||
+# Dirty page rate limit information of a virtual CPU.
|
||||
+#
|
||||
+# @cpu-index: index of a virtual CPU.
|
||||
+#
|
||||
+# @limit-rate: upper limit of dirty page rate (MB/s) for a virtual
|
||||
+# CPU, 0 means unlimited.
|
||||
+#
|
||||
+# @current-rate: current dirty page rate (MB/s) for a virtual CPU.
|
||||
+#
|
||||
+# Since: 6.2
|
||||
+#
|
||||
+##
|
||||
+{ 'struct': 'DirtyLimitInfo',
|
||||
+ 'data': { 'cpu-index': 'int',
|
||||
+ 'limit-rate': 'uint64',
|
||||
+ 'current-rate': 'uint64' } }
|
||||
+
|
||||
+##
|
||||
+# @set-vcpu-dirty-limit:
|
||||
+#
|
||||
+# Set the upper limit of dirty page rate for virtual CPUs.
|
||||
+#
|
||||
+# Requires KVM with accelerator property "dirty-ring-size" set.
|
||||
+# A virtual CPU's dirty page rate is a measure of its memory load.
|
||||
+# To observe dirty page rates, use @calc-dirty-rate.
|
||||
+#
|
||||
+# @cpu-index: index of a virtual CPU, default is all.
|
||||
+#
|
||||
+# @dirty-rate: upper limit of dirty page rate (MB/s) for virtual CPUs.
|
||||
+#
|
||||
+# Since: 6.2
|
||||
+#
|
||||
+# Example:
|
||||
+# {"execute": "set-vcpu-dirty-limit"}
|
||||
+# "arguments": { "dirty-rate": 200,
|
||||
+# "cpu-index": 1 } }
|
||||
+#
|
||||
+##
|
||||
+{ 'command': 'set-vcpu-dirty-limit',
|
||||
+ 'data': { '*cpu-index': 'int',
|
||||
+ 'dirty-rate': 'uint64' } }
|
||||
+
|
||||
+##
|
||||
+# @cancel-vcpu-dirty-limit:
|
||||
+#
|
||||
+# Cancel the upper limit of dirty page rate for virtual CPUs.
|
||||
+#
|
||||
+# Cancel the dirty page limit for the vCPU which has been set with
|
||||
+# set-vcpu-dirty-limit command. Note that this command requires
|
||||
+# support from dirty ring, same as the "set-vcpu-dirty-limit".
|
||||
+#
|
||||
+# @cpu-index: index of a virtual CPU, default is all.
|
||||
+#
|
||||
+# Since: 6.2
|
||||
+#
|
||||
+# Example:
|
||||
+# {"execute": "cancel-vcpu-dirty-limit"}
|
||||
+# "arguments": { "cpu-index": 1 } }
|
||||
+#
|
||||
+##
|
||||
+{ 'command': 'cancel-vcpu-dirty-limit',
|
||||
+ 'data': { '*cpu-index': 'int'} }
|
||||
+
|
||||
+##
|
||||
+# @query-vcpu-dirty-limit:
|
||||
+#
|
||||
+# Returns information about virtual CPU dirty page rate limits, if any.
|
||||
+#
|
||||
+# Since: 6.2
|
||||
+#
|
||||
+# Example:
|
||||
+# {"execute": "query-vcpu-dirty-limit"}
|
||||
+#
|
||||
+##
|
||||
+{ 'command': 'query-vcpu-dirty-limit',
|
||||
+ 'returns': [ 'DirtyLimitInfo' ] }
|
||||
+
|
||||
##
|
||||
# @snapshot-save:
|
||||
#
|
||||
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
|
||||
index e5a4f970bd..8d98cb7f2c 100644
|
||||
--- a/softmmu/dirtylimit.c
|
||||
+++ b/softmmu/dirtylimit.c
|
||||
@@ -14,8 +14,12 @@
|
||||
#include "qapi/error.h"
|
||||
#include "qemu/main-loop.h"
|
||||
#include "qapi/qapi-commands-migration.h"
|
||||
+#include "qapi/qmp/qdict.h"
|
||||
+#include "qapi/error.h"
|
||||
#include "sysemu/dirtyrate.h"
|
||||
#include "sysemu/dirtylimit.h"
|
||||
+#include "monitor/hmp.h"
|
||||
+#include "monitor/monitor.h"
|
||||
#include "exec/memory.h"
|
||||
#include "hw/boards.h"
|
||||
#include "sysemu/kvm.h"
|
||||
@@ -405,3 +409,193 @@ void dirtylimit_vcpu_execute(CPUState *cpu)
|
||||
usleep(cpu->throttle_us_per_full);
|
||||
}
|
||||
}
|
||||
+
|
||||
+static void dirtylimit_init(void)
|
||||
+{
|
||||
+ dirtylimit_state_initialize();
|
||||
+ dirtylimit_change(true);
|
||||
+ vcpu_dirty_rate_stat_initialize();
|
||||
+ vcpu_dirty_rate_stat_start();
|
||||
+}
|
||||
+
|
||||
+static void dirtylimit_cleanup(void)
|
||||
+{
|
||||
+ vcpu_dirty_rate_stat_stop();
|
||||
+ vcpu_dirty_rate_stat_finalize();
|
||||
+ dirtylimit_change(false);
|
||||
+ dirtylimit_state_finalize();
|
||||
+}
|
||||
+
|
||||
+void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
|
||||
+ int64_t cpu_index,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
|
||||
+ error_setg(errp, "incorrect cpu index specified");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (!dirtylimit_in_service()) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ dirtylimit_state_lock();
|
||||
+
|
||||
+ if (has_cpu_index) {
|
||||
+ dirtylimit_set_vcpu(cpu_index, 0, false);
|
||||
+ } else {
|
||||
+ dirtylimit_set_all(0, false);
|
||||
+ }
|
||||
+
|
||||
+ if (!dirtylimit_state->limited_nvcpu) {
|
||||
+ dirtylimit_cleanup();
|
||||
+ }
|
||||
+
|
||||
+ dirtylimit_state_unlock();
|
||||
+}
|
||||
+
|
||||
+void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
|
||||
+{
|
||||
+ int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
|
||||
+ Error *err = NULL;
|
||||
+
|
||||
+ qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
|
||||
+ if (err) {
|
||||
+ hmp_handle_error(mon, err);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
|
||||
+ "dirty limit for virtual CPU]\n");
|
||||
+}
|
||||
+
|
||||
+void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
|
||||
+ int64_t cpu_index,
|
||||
+ uint64_t dirty_rate,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
|
||||
+ error_setg(errp, "dirty page limit feature requires KVM with"
|
||||
+ " accelerator property 'dirty-ring-size' set'");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
|
||||
+ error_setg(errp, "incorrect cpu index specified");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (!dirty_rate) {
|
||||
+ qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ dirtylimit_state_lock();
|
||||
+
|
||||
+ if (!dirtylimit_in_service()) {
|
||||
+ dirtylimit_init();
|
||||
+ }
|
||||
+
|
||||
+ if (has_cpu_index) {
|
||||
+ dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
|
||||
+ } else {
|
||||
+ dirtylimit_set_all(dirty_rate, true);
|
||||
+ }
|
||||
+
|
||||
+ dirtylimit_state_unlock();
|
||||
+}
|
||||
+
|
||||
+void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
|
||||
+{
|
||||
+ int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
|
||||
+ int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
|
||||
+ Error *err = NULL;
|
||||
+
|
||||
+ qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
|
||||
+ if (err) {
|
||||
+ hmp_handle_error(mon, err);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
|
||||
+ "dirty limit for virtual CPU]\n");
|
||||
+}
|
||||
+
|
||||
+static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
|
||||
+{
|
||||
+ DirtyLimitInfo *info = NULL;
|
||||
+
|
||||
+ info = g_malloc0(sizeof(*info));
|
||||
+ info->cpu_index = cpu_index;
|
||||
+ info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
|
||||
+ info->current_rate = vcpu_dirty_rate_get(cpu_index);
|
||||
+
|
||||
+ return info;
|
||||
+}
|
||||
+
|
||||
+static struct DirtyLimitInfoList *dirtylimit_query_all(void)
|
||||
+{
|
||||
+ int i, index;
|
||||
+ DirtyLimitInfo *info = NULL;
|
||||
+ DirtyLimitInfoList *head = NULL, **tail = &head;
|
||||
+
|
||||
+ dirtylimit_state_lock();
|
||||
+
|
||||
+ if (!dirtylimit_in_service()) {
|
||||
+ dirtylimit_state_unlock();
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < dirtylimit_state->max_cpus; i++) {
|
||||
+ index = dirtylimit_state->states[i].cpu_index;
|
||||
+ if (dirtylimit_vcpu_get_state(index)->enabled) {
|
||||
+ info = dirtylimit_query_vcpu(index);
|
||||
+ QAPI_LIST_APPEND(tail, info);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ dirtylimit_state_unlock();
|
||||
+
|
||||
+ return head;
|
||||
+}
|
||||
+
|
||||
+struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
|
||||
+{
|
||||
+ if (!dirtylimit_in_service()) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ return dirtylimit_query_all();
|
||||
+}
|
||||
+
|
||||
+void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
|
||||
+{
|
||||
+ DirtyLimitInfoList *limit, *head, *info = NULL;
|
||||
+ Error *err = NULL;
|
||||
+
|
||||
+ if (!dirtylimit_in_service()) {
|
||||
+ monitor_printf(mon, "Dirty page limit not enabled!\n");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ info = qmp_query_vcpu_dirty_limit(&err);
|
||||
+ if (err) {
|
||||
+ hmp_handle_error(mon, err);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ head = info;
|
||||
+ for (limit = head; limit != NULL; limit = limit->next) {
|
||||
+ monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
|
||||
+ " current rate %"PRIi64 " (MB/s)\n",
|
||||
+ limit->value->cpu_index,
|
||||
+ limit->value->limit_rate,
|
||||
+ limit->value->current_rate);
|
||||
+ }
|
||||
+
|
||||
+ g_free(info);
|
||||
+}
|
||||
diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
|
||||
index 7f103ea3fd..4b216a0435 100644
|
||||
--- a/tests/qtest/qmp-cmd-test.c
|
||||
+++ b/tests/qtest/qmp-cmd-test.c
|
||||
@@ -110,6 +110,8 @@ static bool query_is_ignored(const char *cmd)
|
||||
"query-sev-capabilities",
|
||||
"query-sgx",
|
||||
"query-sgx-capabilities",
|
||||
+ /* Success depends on enabling dirty page rate limit */
|
||||
+ "query-vcpu-dirty-limit",
|
||||
NULL
|
||||
};
|
||||
int i;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
214
softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch
Normal file
214
softmmu-dirtylimit-Implement-vCPU-dirtyrate-calculat.patch
Normal file
@ -0,0 +1,214 @@
|
||||
From 1c1049bda8e91cc6015c32fc7cc9d0f16ad46b58 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
|
||||
<huangy81@chinatelecom.cn>
|
||||
Date: Sun, 26 Jun 2022 01:38:33 +0800
|
||||
Subject: [PATCH 1/3] softmmu/dirtylimit: Implement vCPU dirtyrate calculation
|
||||
periodically
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Introduce the third method GLOBAL_DIRTY_LIMIT of dirty
|
||||
tracking for calculate dirtyrate periodly for dirty page
|
||||
rate limit.
|
||||
|
||||
Add dirtylimit.c to implement dirtyrate calculation periodly,
|
||||
which will be used for dirty page rate limit.
|
||||
|
||||
Add dirtylimit.h to export util functions for dirty page rate
|
||||
limit implementation.
|
||||
|
||||
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Message-Id: <5d0d641bffcb9b1c4cc3e323b6dfecb36050d948.1656177590.git.huangy81@chinatelecom.cn>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
include/exec/memory.h | 5 +-
|
||||
include/sysemu/dirtylimit.h | 22 +++++++
|
||||
softmmu/dirtylimit.c | 116 ++++++++++++++++++++++++++++++++++++
|
||||
softmmu/meson.build | 1 +
|
||||
4 files changed, 143 insertions(+), 1 deletion(-)
|
||||
create mode 100644 include/sysemu/dirtylimit.h
|
||||
create mode 100644 softmmu/dirtylimit.c
|
||||
|
||||
diff --git a/include/exec/memory.h b/include/exec/memory.h
|
||||
index 3e84d62e40..4326d74b95 100644
|
||||
--- a/include/exec/memory.h
|
||||
+++ b/include/exec/memory.h
|
||||
@@ -69,7 +69,10 @@ static inline void fuzz_dma_read_cb(size_t addr,
|
||||
/* Dirty tracking enabled because measuring dirty rate */
|
||||
#define GLOBAL_DIRTY_DIRTY_RATE (1U << 1)
|
||||
|
||||
-#define GLOBAL_DIRTY_MASK (0x3)
|
||||
+/* Dirty tracking enabled because dirty limit */
|
||||
+#define GLOBAL_DIRTY_LIMIT (1U << 2)
|
||||
+
|
||||
+#define GLOBAL_DIRTY_MASK (0x7)
|
||||
|
||||
extern unsigned int global_dirty_tracking;
|
||||
|
||||
diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
|
||||
new file mode 100644
|
||||
index 0000000000..da459f03d6
|
||||
--- /dev/null
|
||||
+++ b/include/sysemu/dirtylimit.h
|
||||
@@ -0,0 +1,22 @@
|
||||
+/*
|
||||
+ * Dirty page rate limit common functions
|
||||
+ *
|
||||
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
|
||||
+ *
|
||||
+ * Authors:
|
||||
+ * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+#ifndef QEMU_DIRTYRLIMIT_H
|
||||
+#define QEMU_DIRTYRLIMIT_H
|
||||
+
|
||||
+#define DIRTYLIMIT_CALC_TIME_MS 1000 /* 1000ms */
|
||||
+
|
||||
+int64_t vcpu_dirty_rate_get(int cpu_index);
|
||||
+void vcpu_dirty_rate_stat_start(void);
|
||||
+void vcpu_dirty_rate_stat_stop(void);
|
||||
+void vcpu_dirty_rate_stat_initialize(void);
|
||||
+void vcpu_dirty_rate_stat_finalize(void);
|
||||
+#endif
|
||||
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
|
||||
new file mode 100644
|
||||
index 0000000000..ebdc064c9d
|
||||
--- /dev/null
|
||||
+++ b/softmmu/dirtylimit.c
|
||||
@@ -0,0 +1,116 @@
|
||||
+/*
|
||||
+ * Dirty page rate limit implementation code
|
||||
+ *
|
||||
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
|
||||
+ *
|
||||
+ * Authors:
|
||||
+ * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "qapi/error.h"
|
||||
+#include "qemu/main-loop.h"
|
||||
+#include "qapi/qapi-commands-migration.h"
|
||||
+#include "sysemu/dirtyrate.h"
|
||||
+#include "sysemu/dirtylimit.h"
|
||||
+#include "exec/memory.h"
|
||||
+#include "hw/boards.h"
|
||||
+
|
||||
+struct {
|
||||
+ VcpuStat stat;
|
||||
+ bool running;
|
||||
+ QemuThread thread;
|
||||
+} *vcpu_dirty_rate_stat;
|
||||
+
|
||||
+static void vcpu_dirty_rate_stat_collect(void)
|
||||
+{
|
||||
+ VcpuStat stat;
|
||||
+ int i = 0;
|
||||
+
|
||||
+ /* calculate vcpu dirtyrate */
|
||||
+ vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
|
||||
+ &stat,
|
||||
+ GLOBAL_DIRTY_LIMIT,
|
||||
+ false);
|
||||
+
|
||||
+ for (i = 0; i < stat.nvcpu; i++) {
|
||||
+ vcpu_dirty_rate_stat->stat.rates[i].id = i;
|
||||
+ vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
|
||||
+ stat.rates[i].dirty_rate;
|
||||
+ }
|
||||
+
|
||||
+ free(stat.rates);
|
||||
+}
|
||||
+
|
||||
+static void *vcpu_dirty_rate_stat_thread(void *opaque)
|
||||
+{
|
||||
+ rcu_register_thread();
|
||||
+
|
||||
+ /* start log sync */
|
||||
+ global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
|
||||
+
|
||||
+ while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
|
||||
+ vcpu_dirty_rate_stat_collect();
|
||||
+ }
|
||||
+
|
||||
+ /* stop log sync */
|
||||
+ global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
|
||||
+
|
||||
+ rcu_unregister_thread();
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+int64_t vcpu_dirty_rate_get(int cpu_index)
|
||||
+{
|
||||
+ DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
|
||||
+ return qatomic_read_i64(&rates[cpu_index].dirty_rate);
|
||||
+}
|
||||
+
|
||||
+void vcpu_dirty_rate_stat_start(void)
|
||||
+{
|
||||
+ if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ qatomic_set(&vcpu_dirty_rate_stat->running, 1);
|
||||
+ qemu_thread_create(&vcpu_dirty_rate_stat->thread,
|
||||
+ "dirtyrate-stat",
|
||||
+ vcpu_dirty_rate_stat_thread,
|
||||
+ NULL,
|
||||
+ QEMU_THREAD_JOINABLE);
|
||||
+}
|
||||
+
|
||||
+void vcpu_dirty_rate_stat_stop(void)
|
||||
+{
|
||||
+ qatomic_set(&vcpu_dirty_rate_stat->running, 0);
|
||||
+ qemu_mutex_unlock_iothread();
|
||||
+ qemu_thread_join(&vcpu_dirty_rate_stat->thread);
|
||||
+ qemu_mutex_lock_iothread();
|
||||
+}
|
||||
+
|
||||
+void vcpu_dirty_rate_stat_initialize(void)
|
||||
+{
|
||||
+ MachineState *ms = MACHINE(qdev_get_machine());
|
||||
+ int max_cpus = ms->smp.max_cpus;
|
||||
+
|
||||
+ vcpu_dirty_rate_stat =
|
||||
+ g_malloc0(sizeof(*vcpu_dirty_rate_stat));
|
||||
+
|
||||
+ vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
|
||||
+ vcpu_dirty_rate_stat->stat.rates =
|
||||
+ g_malloc0(sizeof(DirtyRateVcpu) * max_cpus);
|
||||
+
|
||||
+ vcpu_dirty_rate_stat->running = false;
|
||||
+}
|
||||
+
|
||||
+void vcpu_dirty_rate_stat_finalize(void)
|
||||
+{
|
||||
+ free(vcpu_dirty_rate_stat->stat.rates);
|
||||
+ vcpu_dirty_rate_stat->stat.rates = NULL;
|
||||
+
|
||||
+ free(vcpu_dirty_rate_stat);
|
||||
+ vcpu_dirty_rate_stat = NULL;
|
||||
+}
|
||||
diff --git a/softmmu/meson.build b/softmmu/meson.build
|
||||
index d8e03018ab..95029a5db2 100644
|
||||
--- a/softmmu/meson.build
|
||||
+++ b/softmmu/meson.build
|
||||
@@ -15,6 +15,7 @@ specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files(
|
||||
'vl.c',
|
||||
'cpu-timers.c',
|
||||
'runstate-action.c',
|
||||
+ 'dirtylimit.c',
|
||||
)])
|
||||
|
||||
specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files(
|
||||
--
|
||||
2.27.0
|
||||
|
||||
469
softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch
Normal file
469
softmmu-dirtylimit-Implement-virtual-CPU-throttle.patch
Normal file
@ -0,0 +1,469 @@
|
||||
From 7b6ab56e68fb5031ea13b82743415413b1e70e71 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
|
||||
<huangy81@chinatelecom.cn>
|
||||
Date: Sun, 26 Jun 2022 01:38:35 +0800
|
||||
Subject: [PATCH 3/3] softmmu/dirtylimit: Implement virtual CPU throttle
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Setup a negative feedback system when vCPU thread
|
||||
handling KVM_EXIT_DIRTY_RING_FULL exit by introducing
|
||||
throttle_us_per_full field in struct CPUState. Sleep
|
||||
throttle_us_per_full microseconds to throttle vCPU
|
||||
if dirtylimit is in service.
|
||||
|
||||
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Message-Id: <977e808e03a1cef5151cae75984658b6821be618.1656177590.git.huangy81@chinatelecom.cn>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 20 ++-
|
||||
include/hw/core/cpu.h | 6 +
|
||||
include/sysemu/dirtylimit.h | 15 ++
|
||||
softmmu/dirtylimit.c | 291 ++++++++++++++++++++++++++++++++++++
|
||||
softmmu/trace-events | 7 +
|
||||
5 files changed, 338 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index d0c4310507..946ccb260b 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "qemu/guest-random.h"
|
||||
#include "sysemu/hw_accel.h"
|
||||
#include "kvm-cpus.h"
|
||||
+#include "sysemu/dirtylimit.h"
|
||||
|
||||
#include "hw/boards.h"
|
||||
|
||||
@@ -493,6 +494,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
|
||||
cpu->kvm_state = s;
|
||||
cpu->vcpu_dirty = true;
|
||||
cpu->dirty_pages = 0;
|
||||
+ cpu->throttle_us_per_full = 0;
|
||||
|
||||
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
|
||||
if (mmap_size < 0) {
|
||||
@@ -1486,6 +1488,11 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
|
||||
*/
|
||||
sleep(1);
|
||||
|
||||
+ /* keep sleeping so that dirtylimit not be interfered by reaper */
|
||||
+ if (dirtylimit_in_service()) {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
trace_kvm_dirty_ring_reaper("wakeup");
|
||||
r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING;
|
||||
|
||||
@@ -2965,8 +2972,19 @@ int kvm_cpu_exec(CPUState *cpu)
|
||||
*/
|
||||
trace_kvm_dirty_ring_full(cpu->cpu_index);
|
||||
qemu_mutex_lock_iothread();
|
||||
- kvm_dirty_ring_reap(kvm_state, NULL);
|
||||
+ /*
|
||||
+ * We throttle vCPU by making it sleep once it exit from kernel
|
||||
+ * due to dirty ring full. In the dirtylimit scenario, reaping
|
||||
+ * all vCPUs after a single vCPU dirty ring get full result in
|
||||
+ * the miss of sleep, so just reap the ring-fulled vCPU.
|
||||
+ */
|
||||
+ if (dirtylimit_in_service()) {
|
||||
+ kvm_dirty_ring_reap(kvm_state, cpu);
|
||||
+ } else {
|
||||
+ kvm_dirty_ring_reap(kvm_state, NULL);
|
||||
+ }
|
||||
qemu_mutex_unlock_iothread();
|
||||
+ dirtylimit_vcpu_execute(cpu);
|
||||
ret = 0;
|
||||
break;
|
||||
case KVM_EXIT_SYSTEM_EVENT:
|
||||
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
|
||||
index e948e81f1a..9631c1e2f6 100644
|
||||
--- a/include/hw/core/cpu.h
|
||||
+++ b/include/hw/core/cpu.h
|
||||
@@ -411,6 +411,12 @@ struct CPUState {
|
||||
*/
|
||||
bool throttle_thread_scheduled;
|
||||
|
||||
+ /*
|
||||
+ * Sleep throttle_us_per_full microseconds once dirty ring is full
|
||||
+ * if dirty page rate limit is enabled.
|
||||
+ */
|
||||
+ int64_t throttle_us_per_full;
|
||||
+
|
||||
bool ignore_memory_transaction_failures;
|
||||
|
||||
struct hax_vcpu_state *hax_vcpu;
|
||||
diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
|
||||
index da459f03d6..8d2c1f3a6b 100644
|
||||
--- a/include/sysemu/dirtylimit.h
|
||||
+++ b/include/sysemu/dirtylimit.h
|
||||
@@ -19,4 +19,19 @@ void vcpu_dirty_rate_stat_start(void);
|
||||
void vcpu_dirty_rate_stat_stop(void);
|
||||
void vcpu_dirty_rate_stat_initialize(void);
|
||||
void vcpu_dirty_rate_stat_finalize(void);
|
||||
+
|
||||
+void dirtylimit_state_lock(void);
|
||||
+void dirtylimit_state_unlock(void);
|
||||
+void dirtylimit_state_initialize(void);
|
||||
+void dirtylimit_state_finalize(void);
|
||||
+bool dirtylimit_in_service(void);
|
||||
+bool dirtylimit_vcpu_index_valid(int cpu_index);
|
||||
+void dirtylimit_process(void);
|
||||
+void dirtylimit_change(bool start);
|
||||
+void dirtylimit_set_vcpu(int cpu_index,
|
||||
+ uint64_t quota,
|
||||
+ bool enable);
|
||||
+void dirtylimit_set_all(uint64_t quota,
|
||||
+ bool enable);
|
||||
+void dirtylimit_vcpu_execute(CPUState *cpu);
|
||||
#endif
|
||||
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
|
||||
index ebdc064c9d..e5a4f970bd 100644
|
||||
--- a/softmmu/dirtylimit.c
|
||||
+++ b/softmmu/dirtylimit.c
|
||||
@@ -18,6 +18,26 @@
|
||||
#include "sysemu/dirtylimit.h"
|
||||
#include "exec/memory.h"
|
||||
#include "hw/boards.h"
|
||||
+#include "sysemu/kvm.h"
|
||||
+#include "trace.h"
|
||||
+
|
||||
+/*
|
||||
+ * Dirtylimit stop working if dirty page rate error
|
||||
+ * value less than DIRTYLIMIT_TOLERANCE_RANGE
|
||||
+ */
|
||||
+#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
|
||||
+/*
|
||||
+ * Plus or minus vcpu sleep time linearly if dirty
|
||||
+ * page rate error value percentage over
|
||||
+ * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
|
||||
+ * Otherwise, plus or minus a fixed vcpu sleep time.
|
||||
+ */
|
||||
+#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
|
||||
+/*
|
||||
+ * Max vcpu sleep time percentage during a cycle
|
||||
+ * composed of dirty ring full and sleep time.
|
||||
+ */
|
||||
+#define DIRTYLIMIT_THROTTLE_PCT_MAX 99
|
||||
|
||||
struct {
|
||||
VcpuStat stat;
|
||||
@@ -25,6 +45,30 @@ struct {
|
||||
QemuThread thread;
|
||||
} *vcpu_dirty_rate_stat;
|
||||
|
||||
+typedef struct VcpuDirtyLimitState {
|
||||
+ int cpu_index;
|
||||
+ bool enabled;
|
||||
+ /*
|
||||
+ * Quota dirty page rate, unit is MB/s
|
||||
+ * zero if not enabled.
|
||||
+ */
|
||||
+ uint64_t quota;
|
||||
+} VcpuDirtyLimitState;
|
||||
+
|
||||
+struct {
|
||||
+ VcpuDirtyLimitState *states;
|
||||
+ /* Max cpus number configured by user */
|
||||
+ int max_cpus;
|
||||
+ /* Number of vcpu under dirtylimit */
|
||||
+ int limited_nvcpu;
|
||||
+} *dirtylimit_state;
|
||||
+
|
||||
+/* protect dirtylimit_state */
|
||||
+static QemuMutex dirtylimit_mutex;
|
||||
+
|
||||
+/* dirtylimit thread quit if dirtylimit_quit is true */
|
||||
+static bool dirtylimit_quit;
|
||||
+
|
||||
static void vcpu_dirty_rate_stat_collect(void)
|
||||
{
|
||||
VcpuStat stat;
|
||||
@@ -54,6 +98,9 @@ static void *vcpu_dirty_rate_stat_thread(void *opaque)
|
||||
|
||||
while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
|
||||
vcpu_dirty_rate_stat_collect();
|
||||
+ if (dirtylimit_in_service()) {
|
||||
+ dirtylimit_process();
|
||||
+ }
|
||||
}
|
||||
|
||||
/* stop log sync */
|
||||
@@ -86,9 +133,11 @@ void vcpu_dirty_rate_stat_start(void)
|
||||
void vcpu_dirty_rate_stat_stop(void)
|
||||
{
|
||||
qatomic_set(&vcpu_dirty_rate_stat->running, 0);
|
||||
+ dirtylimit_state_unlock();
|
||||
qemu_mutex_unlock_iothread();
|
||||
qemu_thread_join(&vcpu_dirty_rate_stat->thread);
|
||||
qemu_mutex_lock_iothread();
|
||||
+ dirtylimit_state_lock();
|
||||
}
|
||||
|
||||
void vcpu_dirty_rate_stat_initialize(void)
|
||||
@@ -114,3 +163,245 @@ void vcpu_dirty_rate_stat_finalize(void)
|
||||
free(vcpu_dirty_rate_stat);
|
||||
vcpu_dirty_rate_stat = NULL;
|
||||
}
|
||||
+
|
||||
+void dirtylimit_state_lock(void)
|
||||
+{
|
||||
+ qemu_mutex_lock(&dirtylimit_mutex);
|
||||
+}
|
||||
+
|
||||
+void dirtylimit_state_unlock(void)
|
||||
+{
|
||||
+ qemu_mutex_unlock(&dirtylimit_mutex);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+__attribute__((__constructor__)) dirtylimit_mutex_init(void)
|
||||
+{
|
||||
+ qemu_mutex_init(&dirtylimit_mutex);
|
||||
+}
|
||||
+
|
||||
+static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
|
||||
+{
|
||||
+ return &dirtylimit_state->states[cpu_index];
|
||||
+}
|
||||
+
|
||||
+void dirtylimit_state_initialize(void)
|
||||
+{
|
||||
+ MachineState *ms = MACHINE(qdev_get_machine());
|
||||
+ int max_cpus = ms->smp.max_cpus;
|
||||
+ int i;
|
||||
+
|
||||
+ dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
|
||||
+
|
||||
+ dirtylimit_state->states =
|
||||
+ g_malloc0(sizeof(VcpuDirtyLimitState) * max_cpus);
|
||||
+
|
||||
+ for (i = 0; i < max_cpus; i++) {
|
||||
+ dirtylimit_state->states[i].cpu_index = i;
|
||||
+ }
|
||||
+
|
||||
+ dirtylimit_state->max_cpus = max_cpus;
|
||||
+ trace_dirtylimit_state_initialize(max_cpus);
|
||||
+}
|
||||
+
|
||||
+void dirtylimit_state_finalize(void)
|
||||
+{
|
||||
+ free(dirtylimit_state->states);
|
||||
+ dirtylimit_state->states = NULL;
|
||||
+
|
||||
+ free(dirtylimit_state);
|
||||
+ dirtylimit_state = NULL;
|
||||
+
|
||||
+ trace_dirtylimit_state_finalize();
|
||||
+}
|
||||
+
|
||||
+bool dirtylimit_in_service(void)
|
||||
+{
|
||||
+ return !!dirtylimit_state;
|
||||
+}
|
||||
+
|
||||
+bool dirtylimit_vcpu_index_valid(int cpu_index)
|
||||
+{
|
||||
+ MachineState *ms = MACHINE(qdev_get_machine());
|
||||
+
|
||||
+ return !(cpu_index < 0 ||
|
||||
+ cpu_index >= ms->smp.max_cpus);
|
||||
+}
|
||||
+
|
||||
+static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
|
||||
+{
|
||||
+ static uint64_t max_dirtyrate;
|
||||
+ uint32_t dirty_ring_size = kvm_dirty_ring_size();
|
||||
+ uint64_t dirty_ring_size_meory_MB =
|
||||
+ dirty_ring_size * TARGET_PAGE_SIZE >> 20;
|
||||
+
|
||||
+ if (max_dirtyrate < dirtyrate) {
|
||||
+ max_dirtyrate = dirtyrate;
|
||||
+ }
|
||||
+
|
||||
+ return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
|
||||
+}
|
||||
+
|
||||
+static inline bool dirtylimit_done(uint64_t quota,
|
||||
+ uint64_t current)
|
||||
+{
|
||||
+ uint64_t min, max;
|
||||
+
|
||||
+ min = MIN(quota, current);
|
||||
+ max = MAX(quota, current);
|
||||
+
|
||||
+ return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
|
||||
+}
|
||||
+
|
||||
+static inline bool
|
||||
+dirtylimit_need_linear_adjustment(uint64_t quota,
|
||||
+ uint64_t current)
|
||||
+{
|
||||
+ uint64_t min, max;
|
||||
+
|
||||
+ min = MIN(quota, current);
|
||||
+ max = MAX(quota, current);
|
||||
+
|
||||
+ return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
|
||||
+}
|
||||
+
|
||||
+static void dirtylimit_set_throttle(CPUState *cpu,
|
||||
+ uint64_t quota,
|
||||
+ uint64_t current)
|
||||
+{
|
||||
+ int64_t ring_full_time_us = 0;
|
||||
+ uint64_t sleep_pct = 0;
|
||||
+ uint64_t throttle_us = 0;
|
||||
+
|
||||
+ if (current == 0) {
|
||||
+ cpu->throttle_us_per_full = 0;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
|
||||
+
|
||||
+ if (dirtylimit_need_linear_adjustment(quota, current)) {
|
||||
+ if (quota < current) {
|
||||
+ sleep_pct = (current - quota) * 100 / current;
|
||||
+ throttle_us =
|
||||
+ ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
|
||||
+ cpu->throttle_us_per_full += throttle_us;
|
||||
+ } else {
|
||||
+ sleep_pct = (quota - current) * 100 / quota;
|
||||
+ throttle_us =
|
||||
+ ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
|
||||
+ cpu->throttle_us_per_full -= throttle_us;
|
||||
+ }
|
||||
+
|
||||
+ trace_dirtylimit_throttle_pct(cpu->cpu_index,
|
||||
+ sleep_pct,
|
||||
+ throttle_us);
|
||||
+ } else {
|
||||
+ if (quota < current) {
|
||||
+ cpu->throttle_us_per_full += ring_full_time_us / 10;
|
||||
+ } else {
|
||||
+ cpu->throttle_us_per_full -= ring_full_time_us / 10;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
|
||||
+ * current dirty page rate may never reach the quota, we should stop
|
||||
+ * increasing sleep time?
|
||||
+ */
|
||||
+ cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
|
||||
+ ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
|
||||
+
|
||||
+ cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
|
||||
+}
|
||||
+
|
||||
+static void dirtylimit_adjust_throttle(CPUState *cpu)
|
||||
+{
|
||||
+ uint64_t quota = 0;
|
||||
+ uint64_t current = 0;
|
||||
+ int cpu_index = cpu->cpu_index;
|
||||
+
|
||||
+ quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
|
||||
+ current = vcpu_dirty_rate_get(cpu_index);
|
||||
+
|
||||
+ if (!dirtylimit_done(quota, current)) {
|
||||
+ dirtylimit_set_throttle(cpu, quota, current);
|
||||
+ }
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+void dirtylimit_process(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+
|
||||
+ if (!qatomic_read(&dirtylimit_quit)) {
|
||||
+ dirtylimit_state_lock();
|
||||
+
|
||||
+ if (!dirtylimit_in_service()) {
|
||||
+ dirtylimit_state_unlock();
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ dirtylimit_adjust_throttle(cpu);
|
||||
+ }
|
||||
+ dirtylimit_state_unlock();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void dirtylimit_change(bool start)
|
||||
+{
|
||||
+ if (start) {
|
||||
+ qatomic_set(&dirtylimit_quit, 0);
|
||||
+ } else {
|
||||
+ qatomic_set(&dirtylimit_quit, 1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void dirtylimit_set_vcpu(int cpu_index,
|
||||
+ uint64_t quota,
|
||||
+ bool enable)
|
||||
+{
|
||||
+ trace_dirtylimit_set_vcpu(cpu_index, quota);
|
||||
+
|
||||
+ if (enable) {
|
||||
+ dirtylimit_state->states[cpu_index].quota = quota;
|
||||
+ if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
|
||||
+ dirtylimit_state->limited_nvcpu++;
|
||||
+ }
|
||||
+ } else {
|
||||
+ dirtylimit_state->states[cpu_index].quota = 0;
|
||||
+ if (dirtylimit_state->states[cpu_index].enabled) {
|
||||
+ dirtylimit_state->limited_nvcpu--;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ dirtylimit_state->states[cpu_index].enabled = enable;
|
||||
+}
|
||||
+
|
||||
+void dirtylimit_set_all(uint64_t quota,
|
||||
+ bool enable)
|
||||
+{
|
||||
+ MachineState *ms = MACHINE(qdev_get_machine());
|
||||
+ int max_cpus = ms->smp.max_cpus;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < max_cpus; i++) {
|
||||
+ dirtylimit_set_vcpu(i, quota, enable);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void dirtylimit_vcpu_execute(CPUState *cpu)
|
||||
+{
|
||||
+ if (dirtylimit_in_service() &&
|
||||
+ dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
|
||||
+ cpu->throttle_us_per_full) {
|
||||
+ trace_dirtylimit_vcpu_execute(cpu->cpu_index,
|
||||
+ cpu->throttle_us_per_full);
|
||||
+ usleep(cpu->throttle_us_per_full);
|
||||
+ }
|
||||
+}
|
||||
diff --git a/softmmu/trace-events b/softmmu/trace-events
|
||||
index 9c88887b3c..22606dc27b 100644
|
||||
--- a/softmmu/trace-events
|
||||
+++ b/softmmu/trace-events
|
||||
@@ -31,3 +31,10 @@ runstate_set(int current_state, const char *current_state_str, int new_state, co
|
||||
system_wakeup_request(int reason) "reason=%d"
|
||||
qemu_system_shutdown_request(int reason) "reason=%d"
|
||||
qemu_system_powerdown_request(void) ""
|
||||
+
|
||||
+#dirtylimit.c
|
||||
+dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: max cpus %d"
|
||||
+dirtylimit_state_finalize(void)
|
||||
+dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
|
||||
+dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
|
||||
+dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"
|
||||
--
|
||||
2.27.0
|
||||
|
||||
77
target-i386-kvm-do-not-access-uninitialized-variable.patch
Normal file
77
target-i386-kvm-do-not-access-uninitialized-variable.patch
Normal file
@ -0,0 +1,77 @@
|
||||
From 550d43a946b61bdadb418e0f8bef8b98e646276d Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri, 18 Mar 2022 16:23:47 +0100
|
||||
Subject: [PATCH 09/10] target/i386: kvm: do not access uninitialized variable
|
||||
on older kernels
|
||||
|
||||
from mainline-v7.0.0-rc1
|
||||
commit 3ec5ad40081b14af28496198b4d08dbe13386790
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit 3ec5ad40081b ("target/i386: kvm: do not access
|
||||
uninitialized variable on older kernels")
|
||||
|
||||
---------------------------------------------------------
|
||||
|
||||
target/i386: kvm: do not access uninitialized variable on older kernels
|
||||
|
||||
KVM support for AMX includes a new system attribute, KVM_X86_XCOMP_GUEST_SUPP.
|
||||
Commit 19db68ca68 ("x86: Grant AMX permission for guest", 2022-03-15) however
|
||||
did not fully consider the behavior on older kernels. First, it warns
|
||||
too aggressively. Second, it invokes the KVM_GET_DEVICE_ATTR ioctl
|
||||
unconditionally and then uses the "bitmask" variable, which remains
|
||||
uninitialized if the ioctl fails. Third, kvm_ioctl returns -errno rather
|
||||
than -1 on errors.
|
||||
|
||||
While at it, explain why the ioctl is needed and KVM_GET_SUPPORTED_CPUID
|
||||
is not enough.
|
||||
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
target/i386/kvm/kvm.c | 17 +++++++++++++----
|
||||
1 file changed, 13 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 49fca5ea88..20e418463d 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -409,6 +409,12 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
|
||||
}
|
||||
} else if (function == 0xd && index == 0 &&
|
||||
(reg == R_EAX || reg == R_EDX)) {
|
||||
+ /*
|
||||
+ * The value returned by KVM_GET_SUPPORTED_CPUID does not include
|
||||
+ * features that still have to be enabled with the arch_prctl
|
||||
+ * system call. QEMU needs the full value, which is retrieved
|
||||
+ * with KVM_GET_DEVICE_ATTR.
|
||||
+ */
|
||||
struct kvm_device_attr attr = {
|
||||
.group = 0,
|
||||
.attr = KVM_X86_XCOMP_GUEST_SUPP,
|
||||
@@ -417,13 +423,16 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
|
||||
|
||||
bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
|
||||
if (!sys_attr) {
|
||||
- warn_report("cannot get sys attribute capabilities %d", sys_attr);
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
|
||||
- if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
|
||||
- warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
|
||||
- "error: %d", rc);
|
||||
+ if (rc < 0) {
|
||||
+ if (rc != -ENXIO) {
|
||||
+ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
|
||||
+ "error: %d", rc);
|
||||
+ }
|
||||
+ return ret;
|
||||
}
|
||||
ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
|
||||
} else if (function == 0x80000001 && reg == R_ECX) {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
362
tests-Add-dirty-page-rate-limit-test.patch
Normal file
362
tests-Add-dirty-page-rate-limit-test.patch
Normal file
@ -0,0 +1,362 @@
|
||||
From 8a0f4dcf94b280d5b7db7f604c42d088c928ac0d Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Hyman=20Huang=28=E9=BB=84=E5=8B=87=29?=
|
||||
<huangy81@chinatelecom.cn>
|
||||
Date: Sun, 26 Jun 2022 01:38:37 +0800
|
||||
Subject: [PATCH] tests: Add dirty page rate limit test
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add dirty page rate limit test if kernel support dirty ring,
|
||||
|
||||
The following qmp commands are covered by this test case:
|
||||
"calc-dirty-rate", "query-dirty-rate", "set-vcpu-dirty-limit",
|
||||
"cancel-vcpu-dirty-limit" and "query-vcpu-dirty-limit".
|
||||
|
||||
Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
|
||||
Acked-by: Peter Xu <peterx@redhat.com>
|
||||
Message-Id: <eed5b847a6ef0a9c02a36383dbdd7db367dd1e7e.1656177590.git.huangy81@chinatelecom.cn>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
---
|
||||
tests/qtest/migration-helpers.c | 22 +++
|
||||
tests/qtest/migration-helpers.h | 2 +
|
||||
tests/qtest/migration-test.c | 256 ++++++++++++++++++++++++++++++++
|
||||
3 files changed, 280 insertions(+)
|
||||
|
||||
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
|
||||
index 4ee26014b7..1e594f9cb1 100644
|
||||
--- a/tests/qtest/migration-helpers.c
|
||||
+++ b/tests/qtest/migration-helpers.c
|
||||
@@ -75,6 +75,28 @@ QDict *wait_command(QTestState *who, const char *command, ...)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Execute the qmp command only
|
||||
+ */
|
||||
+QDict *qmp_command(QTestState *who, const char *command, ...)
|
||||
+{
|
||||
+ va_list ap;
|
||||
+ QDict *resp, *ret;
|
||||
+
|
||||
+ va_start(ap, command);
|
||||
+ resp = qtest_vqmp(who, command, ap);
|
||||
+ va_end(ap);
|
||||
+
|
||||
+ g_assert(!qdict_haskey(resp, "error"));
|
||||
+ g_assert(qdict_haskey(resp, "return"));
|
||||
+
|
||||
+ ret = qdict_get_qdict(resp, "return");
|
||||
+ qobject_ref(ret);
|
||||
+ qobject_unref(resp);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Send QMP command "migrate".
|
||||
* Arguments are built from @fmt... (formatted like
|
||||
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
|
||||
index d63bba9630..9bc809fb75 100644
|
||||
--- a/tests/qtest/migration-helpers.h
|
||||
+++ b/tests/qtest/migration-helpers.h
|
||||
@@ -22,6 +22,8 @@ QDict *wait_command_fd(QTestState *who, int fd, const char *command, ...);
|
||||
GCC_FMT_ATTR(2, 3)
|
||||
QDict *wait_command(QTestState *who, const char *command, ...);
|
||||
|
||||
+QDict *qmp_command(QTestState *who, const char *command, ...);
|
||||
+
|
||||
GCC_FMT_ATTR(3, 4)
|
||||
void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...);
|
||||
|
||||
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
|
||||
index 7b42f6fd90..8fad247f6c 100644
|
||||
--- a/tests/qtest/migration-test.c
|
||||
+++ b/tests/qtest/migration-test.c
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "qapi/qapi-visit-sockets.h"
|
||||
#include "qapi/qobject-input-visitor.h"
|
||||
#include "qapi/qobject-output-visitor.h"
|
||||
+#include "qapi/qmp/qlist.h"
|
||||
|
||||
#include "migration-helpers.h"
|
||||
#include "tests/migration/migration-test.h"
|
||||
@@ -42,6 +43,12 @@ static bool uffd_feature_thread_id;
|
||||
/* A downtime where the test really should converge */
|
||||
#define CONVERGE_DOWNTIME 1000
|
||||
|
||||
+/*
|
||||
+ * Dirtylimit stop working if dirty page rate error
|
||||
+ * value less than DIRTYLIMIT_TOLERANCE_RANGE
|
||||
+ */
|
||||
+#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
|
||||
+
|
||||
#if defined(__linux__)
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/vfs.h>
|
||||
@@ -1394,6 +1401,253 @@ static void test_multifd_tcp_cancel(void)
|
||||
test_migrate_end(from, to2, true);
|
||||
}
|
||||
|
||||
+static void calc_dirty_rate(QTestState *who, uint64_t calc_time)
|
||||
+{
|
||||
+ qobject_unref(qmp_command(who,
|
||||
+ "{ 'execute': 'calc-dirty-rate',"
|
||||
+ "'arguments': { "
|
||||
+ "'calc-time': %ld,"
|
||||
+ "'mode': 'dirty-ring' }}",
|
||||
+ calc_time));
|
||||
+}
|
||||
+
|
||||
+static QDict *query_dirty_rate(QTestState *who)
|
||||
+{
|
||||
+ return qmp_command(who, "{ 'execute': 'query-dirty-rate' }");
|
||||
+}
|
||||
+
|
||||
+static void dirtylimit_set_all(QTestState *who, uint64_t dirtyrate)
|
||||
+{
|
||||
+ qobject_unref(qmp_command(who,
|
||||
+ "{ 'execute': 'set-vcpu-dirty-limit',"
|
||||
+ "'arguments': { "
|
||||
+ "'dirty-rate': %ld } }",
|
||||
+ dirtyrate));
|
||||
+}
|
||||
+
|
||||
+static void cancel_vcpu_dirty_limit(QTestState *who)
|
||||
+{
|
||||
+ qobject_unref(qmp_command(who,
|
||||
+ "{ 'execute': 'cancel-vcpu-dirty-limit' }"));
|
||||
+}
|
||||
+
|
||||
+static QDict *query_vcpu_dirty_limit(QTestState *who)
|
||||
+{
|
||||
+ QDict *rsp;
|
||||
+
|
||||
+ rsp = qtest_qmp(who, "{ 'execute': 'query-vcpu-dirty-limit' }");
|
||||
+ g_assert(!qdict_haskey(rsp, "error"));
|
||||
+ g_assert(qdict_haskey(rsp, "return"));
|
||||
+
|
||||
+ return rsp;
|
||||
+}
|
||||
+
|
||||
+static bool calc_dirtyrate_ready(QTestState *who)
|
||||
+{
|
||||
+ QDict *rsp_return;
|
||||
+ gchar *status;
|
||||
+
|
||||
+ rsp_return = query_dirty_rate(who);
|
||||
+ g_assert(rsp_return);
|
||||
+
|
||||
+ status = g_strdup(qdict_get_str(rsp_return, "status"));
|
||||
+ g_assert(status);
|
||||
+
|
||||
+ return g_strcmp0(status, "measuring");
|
||||
+}
|
||||
+
|
||||
+static void wait_for_calc_dirtyrate_complete(QTestState *who,
|
||||
+ int64_t time_s)
|
||||
+{
|
||||
+ int max_try_count = 10000;
|
||||
+ usleep(time_s * 1000000);
|
||||
+
|
||||
+ while (!calc_dirtyrate_ready(who) && max_try_count--) {
|
||||
+ usleep(1000);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Set the timeout with 10 s(max_try_count * 1000us),
|
||||
+ * if dirtyrate measurement not complete, fail test.
|
||||
+ */
|
||||
+ g_assert_cmpint(max_try_count, !=, 0);
|
||||
+}
|
||||
+
|
||||
+static int64_t get_dirty_rate(QTestState *who)
|
||||
+{
|
||||
+ QDict *rsp_return;
|
||||
+ gchar *status;
|
||||
+ QList *rates;
|
||||
+ const QListEntry *entry;
|
||||
+ QDict *rate;
|
||||
+ int64_t dirtyrate;
|
||||
+
|
||||
+ rsp_return = query_dirty_rate(who);
|
||||
+ g_assert(rsp_return);
|
||||
+
|
||||
+ status = g_strdup(qdict_get_str(rsp_return, "status"));
|
||||
+ g_assert(status);
|
||||
+ g_assert_cmpstr(status, ==, "measured");
|
||||
+
|
||||
+ rates = qdict_get_qlist(rsp_return, "vcpu-dirty-rate");
|
||||
+ g_assert(rates && !qlist_empty(rates));
|
||||
+
|
||||
+ entry = qlist_first(rates);
|
||||
+ g_assert(entry);
|
||||
+
|
||||
+ rate = qobject_to(QDict, qlist_entry_obj(entry));
|
||||
+ g_assert(rate);
|
||||
+
|
||||
+ dirtyrate = qdict_get_try_int(rate, "dirty-rate", -1);
|
||||
+
|
||||
+ qobject_unref(rsp_return);
|
||||
+ return dirtyrate;
|
||||
+}
|
||||
+
|
||||
+static int64_t get_limit_rate(QTestState *who)
|
||||
+{
|
||||
+ QDict *rsp_return;
|
||||
+ QList *rates;
|
||||
+ const QListEntry *entry;
|
||||
+ QDict *rate;
|
||||
+ int64_t dirtyrate;
|
||||
+
|
||||
+ rsp_return = query_vcpu_dirty_limit(who);
|
||||
+ g_assert(rsp_return);
|
||||
+
|
||||
+ rates = qdict_get_qlist(rsp_return, "return");
|
||||
+ g_assert(rates && !qlist_empty(rates));
|
||||
+
|
||||
+ entry = qlist_first(rates);
|
||||
+ g_assert(entry);
|
||||
+
|
||||
+ rate = qobject_to(QDict, qlist_entry_obj(entry));
|
||||
+ g_assert(rate);
|
||||
+
|
||||
+ dirtyrate = qdict_get_try_int(rate, "limit-rate", -1);
|
||||
+
|
||||
+ qobject_unref(rsp_return);
|
||||
+ return dirtyrate;
|
||||
+}
|
||||
+
|
||||
+static QTestState *dirtylimit_start_vm(void)
|
||||
+{
|
||||
+ QTestState *vm = NULL;
|
||||
+ g_autofree gchar *cmd = NULL;
|
||||
+ const char *arch = qtest_get_arch();
|
||||
+ g_autofree char *bootpath = NULL;
|
||||
+
|
||||
+ assert((strcmp(arch, "x86_64") == 0));
|
||||
+ bootpath = g_strdup_printf("%s/bootsect", tmpfs);
|
||||
+ assert(sizeof(x86_bootsect) == 512);
|
||||
+ init_bootfile(bootpath, x86_bootsect, sizeof(x86_bootsect));
|
||||
+
|
||||
+ cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 "
|
||||
+ "-name dirtylimit-test,debug-threads=on "
|
||||
+ "-m 150M -smp 1 "
|
||||
+ "-serial file:%s/vm_serial "
|
||||
+ "-drive file=%s,format=raw ",
|
||||
+ tmpfs, bootpath);
|
||||
+
|
||||
+ vm = qtest_init(cmd);
|
||||
+ return vm;
|
||||
+}
|
||||
+
|
||||
+static void dirtylimit_stop_vm(QTestState *vm)
|
||||
+{
|
||||
+ qtest_quit(vm);
|
||||
+ cleanup("bootsect");
|
||||
+ cleanup("vm_serial");
|
||||
+}
|
||||
+
|
||||
+static void test_vcpu_dirty_limit(void)
|
||||
+{
|
||||
+ QTestState *vm;
|
||||
+ int64_t origin_rate;
|
||||
+ int64_t quota_rate;
|
||||
+ int64_t rate ;
|
||||
+ int max_try_count = 20;
|
||||
+ int hit = 0;
|
||||
+
|
||||
+ /* Start vm for vcpu dirtylimit test */
|
||||
+ vm = dirtylimit_start_vm();
|
||||
+
|
||||
+ /* Wait for the first serial output from the vm*/
|
||||
+ wait_for_serial("vm_serial");
|
||||
+
|
||||
+ /* Do dirtyrate measurement with calc time equals 1s */
|
||||
+ calc_dirty_rate(vm, 1);
|
||||
+
|
||||
+ /* Sleep calc time and wait for calc dirtyrate complete */
|
||||
+ wait_for_calc_dirtyrate_complete(vm, 1);
|
||||
+
|
||||
+ /* Query original dirty page rate */
|
||||
+ origin_rate = get_dirty_rate(vm);
|
||||
+
|
||||
+ /* VM booted from bootsect should dirty memory steadily */
|
||||
+ assert(origin_rate != 0);
|
||||
+
|
||||
+ /* Setup quota dirty page rate at half of origin */
|
||||
+ quota_rate = origin_rate / 2;
|
||||
+
|
||||
+ /* Set dirtylimit */
|
||||
+ dirtylimit_set_all(vm, quota_rate);
|
||||
+
|
||||
+ /*
|
||||
+ * Check if set-vcpu-dirty-limit and query-vcpu-dirty-limit
|
||||
+ * works literally
|
||||
+ */
|
||||
+ g_assert_cmpint(quota_rate, ==, get_limit_rate(vm));
|
||||
+
|
||||
+ /* Sleep a bit to check if it take effect */
|
||||
+ usleep(2000000);
|
||||
+
|
||||
+ /*
|
||||
+ * Check if dirtylimit take effect realistically, set the
|
||||
+ * timeout with 20 s(max_try_count * 1s), if dirtylimit
|
||||
+ * doesn't take effect, fail test.
|
||||
+ */
|
||||
+ while (--max_try_count) {
|
||||
+ calc_dirty_rate(vm, 1);
|
||||
+ wait_for_calc_dirtyrate_complete(vm, 1);
|
||||
+ rate = get_dirty_rate(vm);
|
||||
+
|
||||
+ /*
|
||||
+ * Assume hitting if current rate is less
|
||||
+ * than quota rate (within accepting error)
|
||||
+ */
|
||||
+ if (rate < (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
|
||||
+ hit = 1;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ g_assert_cmpint(hit, ==, 1);
|
||||
+
|
||||
+ hit = 0;
|
||||
+ max_try_count = 20;
|
||||
+
|
||||
+ /* Check if dirtylimit cancellation take effect */
|
||||
+ cancel_vcpu_dirty_limit(vm);
|
||||
+ while (--max_try_count) {
|
||||
+ calc_dirty_rate(vm, 1);
|
||||
+ wait_for_calc_dirtyrate_complete(vm, 1);
|
||||
+ rate = get_dirty_rate(vm);
|
||||
+
|
||||
+ /*
|
||||
+ * Assume dirtylimit be canceled if current rate is
|
||||
+ * greater than quota rate (within accepting error)
|
||||
+ */
|
||||
+ if (rate > (quota_rate + DIRTYLIMIT_TOLERANCE_RANGE)) {
|
||||
+ hit = 1;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ g_assert_cmpint(hit, ==, 1);
|
||||
+ dirtylimit_stop_vm(vm);
|
||||
+}
|
||||
+
|
||||
static bool kvm_dirty_ring_supported(void)
|
||||
{
|
||||
#if defined(__linux__) && defined(HOST_X86_64)
|
||||
@@ -1483,6 +1737,8 @@ int main(int argc, char **argv)
|
||||
if (kvm_dirty_ring_supported()) {
|
||||
qtest_add_func("/migration/dirty_ring",
|
||||
test_precopy_unix_dirty_ring);
|
||||
+ qtest_add_func("/migration/vcpu_dirty_limit",
|
||||
+ test_vcpu_dirty_limit);
|
||||
}
|
||||
|
||||
ret = g_test_run();
|
||||
--
|
||||
2.27.0
|
||||
|
||||
138
x86-Add-AMX-CPUIDs-enumeration.patch
Normal file
138
x86-Add-AMX-CPUIDs-enumeration.patch
Normal file
@ -0,0 +1,138 @@
|
||||
From 42f96b9e73ff4a23fad56bc8fefea5e477ee95b9 Mon Sep 17 00:00:00 2001
|
||||
From: Jing Liu <jing2.liu@intel.com>
|
||||
Date: Wed, 16 Feb 2022 22:04:31 -0800
|
||||
Subject: [PATCH 06/10] x86: Add AMX CPUIDs enumeration
|
||||
|
||||
from mainline-v7.0.0-rc0
|
||||
commit f21a48171cf3fa39532fc8553fd82e81b88b6474
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit f21a48171cf3 ("x86: Add AMX CPUIDs enumeration")
|
||||
|
||||
----------------------------------------------
|
||||
|
||||
x86: Add AMX CPUIDs enumeration
|
||||
|
||||
Add AMX primary feature bits XFD and AMX_TILE to
|
||||
enumerate the CPU's AMX capability. Meanwhile, add
|
||||
AMX TILE and TMUL CPUID leaf and subleaves which
|
||||
exist when AMX TILE is present to provide the maximum
|
||||
capability of TILE and TMUL.
|
||||
|
||||
Signed-off-by: Jing Liu <jing2.liu@intel.com>
|
||||
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
|
||||
Message-Id: <20220217060434.52460-6-yang.zhong@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
target/i386/cpu.c | 55 ++++++++++++++++++++++++++++++++++++++++---
|
||||
target/i386/kvm/kvm.c | 4 +++-
|
||||
2 files changed, 55 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index da81e47dc3..1bc03d3eef 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -574,6 +574,18 @@ static CPUCacheInfo legacy_l3_cache = {
|
||||
#define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */
|
||||
#define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
|
||||
|
||||
+/* CPUID Leaf 0x1D constants: */
|
||||
+#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1
|
||||
+#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000
|
||||
+#define INTEL_AMX_BYTES_PER_TILE 0x400
|
||||
+#define INTEL_AMX_BYTES_PER_ROW 0x40
|
||||
+#define INTEL_AMX_TILE_MAX_NAMES 0x8
|
||||
+#define INTEL_AMX_TILE_MAX_ROWS 0x10
|
||||
+
|
||||
+/* CPUID Leaf 0x1E constants: */
|
||||
+#define INTEL_AMX_TMUL_MAX_K 0x10
|
||||
+#define INTEL_AMX_TMUL_MAX_N 0x40
|
||||
+
|
||||
void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
|
||||
uint32_t vendor2, uint32_t vendor3)
|
||||
{
|
||||
@@ -843,8 +855,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
|
||||
"avx512-vp2intersect", NULL, "md-clear", NULL,
|
||||
NULL, NULL, "serialize", NULL,
|
||||
"tsx-ldtrk", NULL, NULL /* pconfig */, NULL,
|
||||
- NULL, NULL, NULL, "avx512-fp16",
|
||||
- NULL, NULL, "spec-ctrl", "stibp",
|
||||
+ NULL, NULL, "amx-bf16", "avx512-fp16",
|
||||
+ "amx-tile", "amx-int8", "spec-ctrl", "stibp",
|
||||
NULL, "arch-capabilities", "core-capability", "ssbd",
|
||||
},
|
||||
.cpuid = {
|
||||
@@ -909,7 +921,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
|
||||
.type = CPUID_FEATURE_WORD,
|
||||
.feat_names = {
|
||||
"xsaveopt", "xsavec", "xgetbv1", "xsaves",
|
||||
- NULL, NULL, NULL, NULL,
|
||||
+ "xfd", NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL,
|
||||
@@ -5605,6 +5617,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
}
|
||||
break;
|
||||
}
|
||||
+ case 0x1D: {
|
||||
+ /* AMX TILE */
|
||||
+ *eax = 0;
|
||||
+ *ebx = 0;
|
||||
+ *ecx = 0;
|
||||
+ *edx = 0;
|
||||
+ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (count == 0) {
|
||||
+ /* Highest numbered palette subleaf */
|
||||
+ *eax = INTEL_AMX_TILE_MAX_SUBLEAF;
|
||||
+ } else if (count == 1) {
|
||||
+ *eax = INTEL_AMX_TOTAL_TILE_BYTES |
|
||||
+ (INTEL_AMX_BYTES_PER_TILE << 16);
|
||||
+ *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16);
|
||||
+ *ecx = INTEL_AMX_TILE_MAX_ROWS;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ case 0x1E: {
|
||||
+ /* AMX TMUL */
|
||||
+ *eax = 0;
|
||||
+ *ebx = 0;
|
||||
+ *ecx = 0;
|
||||
+ *edx = 0;
|
||||
+ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (count == 0) {
|
||||
+ /* Highest numbered palette subleaf */
|
||||
+ *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8);
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
case 0x40000000:
|
||||
/*
|
||||
* CPUID code in kvm_arch_init_vcpu() ignores stuff
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index e7f57d05a2..60ccdec5e8 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -1779,7 +1779,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
c = &cpuid_data.entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
- case 0x14: {
|
||||
+ case 0x14:
|
||||
+ case 0x1d:
|
||||
+ case 0x1e: {
|
||||
uint32_t times;
|
||||
|
||||
c->function = i;
|
||||
--
|
||||
2.27.0
|
||||
|
||||
115
x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
Normal file
115
x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
Normal file
@ -0,0 +1,115 @@
|
||||
From 98f5dbc3fd8390728401528786ac94b39f0581ee Mon Sep 17 00:00:00 2001
|
||||
From: Jing Liu <jing2.liu@intel.com>
|
||||
Date: Wed, 16 Feb 2022 22:04:28 -0800
|
||||
Subject: [PATCH 03/10] x86: Add AMX XTILECFG and XTILEDATA components
|
||||
|
||||
from mainline-v7.0.0-rc0
|
||||
commit 1f16764f7d4515bfd5e4ae0aae814fa280a7d0c8
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit 1f16764f7d45 ("x86: Add AMX XTILECFG and XTILEDATA components")
|
||||
|
||||
-------------------------------------------------------------
|
||||
|
||||
x86: Add AMX XTILECFG and XTILEDATA components
|
||||
|
||||
The AMX TILECFG register and the TMMx tile data registers are
|
||||
saved/restored via XSAVE, respectively in state component 17
|
||||
(64 bytes) and state component 18 (8192 bytes).
|
||||
|
||||
Add AMX feature bits to x86_ext_save_areas array to set
|
||||
up AMX components. Add structs that define the layout of
|
||||
AMX XSAVE areas and use QEMU_BUILD_BUG_ON to validate the
|
||||
structs sizes.
|
||||
|
||||
Signed-off-by: Jing Liu <jing2.liu@intel.com>
|
||||
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
|
||||
Message-Id: <20220217060434.52460-3-yang.zhong@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
target/i386/cpu.c | 8 ++++++++
|
||||
target/i386/cpu.h | 18 +++++++++++++++++-
|
||||
2 files changed, 25 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 532ca45015..31d63be081 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -1401,6 +1401,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
|
||||
[XSTATE_PKRU_BIT] =
|
||||
{ .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU,
|
||||
.size = sizeof(XSavePKRU) },
|
||||
+ [XSTATE_XTILE_CFG_BIT] = {
|
||||
+ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
|
||||
+ .size = sizeof(XSaveXTILECFG),
|
||||
+ },
|
||||
+ [XSTATE_XTILE_DATA_BIT] = {
|
||||
+ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
|
||||
+ .size = sizeof(XSaveXTILEDATA)
|
||||
+ },
|
||||
};
|
||||
|
||||
static uint32_t xsave_area_size(uint64_t mask)
|
||||
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
|
||||
index 52330d1112..cc431b1d76 100644
|
||||
--- a/target/i386/cpu.h
|
||||
+++ b/target/i386/cpu.h
|
||||
@@ -538,6 +538,8 @@ typedef enum X86Seg {
|
||||
#define XSTATE_ZMM_Hi256_BIT 6
|
||||
#define XSTATE_Hi16_ZMM_BIT 7
|
||||
#define XSTATE_PKRU_BIT 9
|
||||
+#define XSTATE_XTILE_CFG_BIT 17
|
||||
+#define XSTATE_XTILE_DATA_BIT 18
|
||||
|
||||
#define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT)
|
||||
#define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT)
|
||||
@@ -846,6 +848,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
|
||||
#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
|
||||
/* AVX512_FP16 instruction */
|
||||
#define CPUID_7_0_EDX_AVX512_FP16 (1U << 23)
|
||||
+/* AMX tile (two-dimensional register) */
|
||||
+#define CPUID_7_0_EDX_AMX_TILE (1U << 24)
|
||||
/* Speculation Control */
|
||||
#define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
|
||||
/* Single Thread Indirect Branch Predictors */
|
||||
@@ -1349,6 +1353,16 @@ typedef struct XSavePKRU {
|
||||
uint32_t padding;
|
||||
} XSavePKRU;
|
||||
|
||||
+/* Ext. save area 17: AMX XTILECFG state */
|
||||
+typedef struct XSaveXTILECFG {
|
||||
+ uint8_t xtilecfg[64];
|
||||
+} XSaveXTILECFG;
|
||||
+
|
||||
+/* Ext. save area 18: AMX XTILEDATA state */
|
||||
+typedef struct XSaveXTILEDATA {
|
||||
+ uint8_t xtiledata[8][1024];
|
||||
+} XSaveXTILEDATA;
|
||||
+
|
||||
QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
|
||||
QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
|
||||
QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
|
||||
@@ -1356,6 +1370,8 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
|
||||
QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
|
||||
QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
|
||||
QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
|
||||
+QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40);
|
||||
+QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000);
|
||||
|
||||
typedef struct ExtSaveArea {
|
||||
uint32_t feature, bits;
|
||||
@@ -1363,7 +1379,7 @@ typedef struct ExtSaveArea {
|
||||
uint32_t ecx;
|
||||
} ExtSaveArea;
|
||||
|
||||
-#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
|
||||
+#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1)
|
||||
|
||||
extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT];
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
66
x86-Add-XFD-faulting-bit-for-state-components.patch
Normal file
66
x86-Add-XFD-faulting-bit-for-state-components.patch
Normal file
@ -0,0 +1,66 @@
|
||||
From 52eed626a2200da02e67aa93c2a8d59cb529737b Mon Sep 17 00:00:00 2001
|
||||
From: Jing Liu <jing2.liu@intel.com>
|
||||
Date: Wed, 16 Feb 2022 22:04:30 -0800
|
||||
Subject: [PATCH 05/10] x86: Add XFD faulting bit for state components
|
||||
|
||||
from mainline-v7.0.0-rc0
|
||||
commit 0f17f6b30f3b051f0f96ccc98c9f7f395713699f
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit 0f17f6b30f3b ("x86: Add XFD faulting bit for state
|
||||
components")
|
||||
|
||||
-------------------------------------------------
|
||||
|
||||
x86: Add XFD faulting bit for state components
|
||||
|
||||
Intel introduces XFD faulting mechanism for extended
|
||||
XSAVE features to dynamically enable the features in
|
||||
runtime. If CPUID (EAX=0Dh, ECX=n, n>1).ECX[2] is set
|
||||
as 1, it indicates support for XFD faulting of this
|
||||
state component.
|
||||
|
||||
Signed-off-by: Jing Liu <jing2.liu@intel.com>
|
||||
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
|
||||
Message-Id: <20220217060434.52460-5-yang.zhong@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
target/i386/cpu.c | 3 ++-
|
||||
target/i386/cpu.h | 2 ++
|
||||
2 files changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index fb6b4c86de..da81e47dc3 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -5515,7 +5515,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
const ExtSaveArea *esa = &x86_ext_save_areas[count];
|
||||
*eax = esa->size;
|
||||
*ebx = esa->offset;
|
||||
- *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
|
||||
+ *ecx = esa->ecx &
|
||||
+ (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK);
|
||||
}
|
||||
}
|
||||
break;
|
||||
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
|
||||
index 93d1c60ac1..09c725ee13 100644
|
||||
--- a/target/i386/cpu.h
|
||||
+++ b/target/i386/cpu.h
|
||||
@@ -556,8 +556,10 @@ typedef enum X86Seg {
|
||||
#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK)
|
||||
|
||||
#define ESA_FEATURE_ALIGN64_BIT 1
|
||||
+#define ESA_FEATURE_XFD_BIT 2
|
||||
|
||||
#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT)
|
||||
+#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT)
|
||||
|
||||
|
||||
/* CPUID feature words */
|
||||
--
|
||||
2.27.0
|
||||
|
||||
91
x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
Normal file
91
x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
Normal file
@ -0,0 +1,91 @@
|
||||
From ab183c656a2bee466e7c609224cddb75b80d9d6f Mon Sep 17 00:00:00 2001
|
||||
From: Jing Liu <jing2.liu@intel.com>
|
||||
Date: Wed, 16 Feb 2022 22:04:27 -0800
|
||||
Subject: [PATCH 02/10] x86: Fix the 64-byte boundary enumeration for extended
|
||||
state
|
||||
|
||||
from mainline-v7.0.0-rc0
|
||||
commit 131266b7565bd437127bd231563572696bb27235
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit 131266b7565b ("x86: Fix the 64-byte boundary enumeration for extended state")
|
||||
|
||||
-----------------------------------------------------------
|
||||
|
||||
x86: Fix the 64-byte boundary enumeration for extended state
|
||||
|
||||
The extended state subleaves (EAX=0Dh, ECX=n, n>1).ECX[1]
|
||||
indicate whether the extended state component locates
|
||||
on the next 64-byte boundary following the preceding state
|
||||
component when the compacted format of an XSAVE area is
|
||||
used.
|
||||
|
||||
Right now, they are all zero because no supported component
|
||||
needed the bit to be set, but the upcoming AMX feature will
|
||||
use it. Fix the subleaves value according to KVM's supported
|
||||
cpuid.
|
||||
|
||||
Signed-off-by: Jing Liu <jing2.liu@intel.com>
|
||||
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
|
||||
Message-Id: <20220217060434.52460-2-yang.zhong@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
target/i386/cpu.c | 1 +
|
||||
target/i386/cpu.h | 6 ++++++
|
||||
target/i386/kvm/kvm-cpu.c | 1 +
|
||||
3 files changed, 8 insertions(+)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index d9dca1dafb..532ca45015 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -5507,6 +5507,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
const ExtSaveArea *esa = &x86_ext_save_areas[count];
|
||||
*eax = esa->size;
|
||||
*ebx = esa->offset;
|
||||
+ *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
|
||||
}
|
||||
}
|
||||
break;
|
||||
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
|
||||
index d9296a9abc..52330d1112 100644
|
||||
--- a/target/i386/cpu.h
|
||||
+++ b/target/i386/cpu.h
|
||||
@@ -549,6 +549,11 @@ typedef enum X86Seg {
|
||||
#define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT)
|
||||
#define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT)
|
||||
|
||||
+#define ESA_FEATURE_ALIGN64_BIT 1
|
||||
+
|
||||
+#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT)
|
||||
+
|
||||
+
|
||||
/* CPUID feature words */
|
||||
typedef enum FeatureWord {
|
||||
FEAT_1_EDX, /* CPUID[1].EDX */
|
||||
@@ -1355,6 +1360,7 @@ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
|
||||
typedef struct ExtSaveArea {
|
||||
uint32_t feature, bits;
|
||||
uint32_t offset, size;
|
||||
+ uint32_t ecx;
|
||||
} ExtSaveArea;
|
||||
|
||||
#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
|
||||
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
|
||||
index d95028018e..ce27d3b1df 100644
|
||||
--- a/target/i386/kvm/kvm-cpu.c
|
||||
+++ b/target/i386/kvm/kvm-cpu.c
|
||||
@@ -104,6 +104,7 @@ static void kvm_cpu_xsave_init(void)
|
||||
if (sz != 0) {
|
||||
assert(esa->size == sz);
|
||||
esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
|
||||
+ esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
|
||||
}
|
||||
}
|
||||
}
|
||||
--
|
||||
2.27.0
|
||||
|
||||
218
x86-Grant-AMX-permission-for-guest.patch
Normal file
218
x86-Grant-AMX-permission-for-guest.patch
Normal file
@ -0,0 +1,218 @@
|
||||
From b7e588a4506ce61c13e78175c2da5b69b60af128 Mon Sep 17 00:00:00 2001
|
||||
From: Yang Zhong <yang.zhong@intel.com>
|
||||
Date: Wed, 16 Feb 2022 22:04:29 -0800
|
||||
Subject: [PATCH 04/10] x86: Grant AMX permission for guest
|
||||
|
||||
from mainline-v7.0.0-rc0
|
||||
commit 19db68ca68a78fa033a21d419036b6e416554564
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit 19db68ca68a7 ("x86: Grant AMX permission for guest")
|
||||
|
||||
--------------------------------------------------------
|
||||
|
||||
x86: Grant AMX permission for guest
|
||||
|
||||
Kernel allocates 4K xstate buffer by default. For XSAVE features
|
||||
which require large state component (e.g. AMX), Linux kernel
|
||||
dynamically expands the xstate buffer only after the process has
|
||||
acquired the necessary permissions. Those are called dynamically-
|
||||
enabled XSAVE features (or dynamic xfeatures).
|
||||
|
||||
There are separate permissions for native tasks and guests.
|
||||
|
||||
Qemu should request the guest permissions for dynamic xfeatures
|
||||
which will be exposed to the guest. This only needs to be done
|
||||
once before the first vcpu is created.
|
||||
|
||||
KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to
|
||||
get host side supported_xcr0 and Qemu can decide if it can request
|
||||
dynamically enabled XSAVE features permission.
|
||||
https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/
|
||||
|
||||
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
|
||||
Signed-off-by: Jing Liu <jing2.liu@intel.com>
|
||||
Message-Id: <20220217060434.52460-4-yang.zhong@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
target/i386/cpu.c | 7 +++++
|
||||
target/i386/cpu.h | 4 +++
|
||||
target/i386/kvm/kvm-cpu.c | 12 ++++----
|
||||
target/i386/kvm/kvm.c | 57 ++++++++++++++++++++++++++++++++++++++
|
||||
target/i386/kvm/kvm_i386.h | 1 +
|
||||
5 files changed, 75 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 31d63be081..fb6b4c86de 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -6048,6 +6048,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
|
||||
CPUX86State *env = &cpu->env;
|
||||
int i;
|
||||
uint64_t mask;
|
||||
+ static bool request_perm;
|
||||
|
||||
if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
|
||||
env->features[FEAT_XSAVE_COMP_LO] = 0;
|
||||
@@ -6063,6 +6064,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
|
||||
}
|
||||
}
|
||||
|
||||
+ /* Only request permission for first vcpu */
|
||||
+ if (kvm_enabled() && !request_perm) {
|
||||
+ kvm_request_xsave_components(cpu, mask);
|
||||
+ request_perm = true;
|
||||
+ }
|
||||
+
|
||||
env->features[FEAT_XSAVE_COMP_LO] = mask;
|
||||
env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
|
||||
}
|
||||
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
|
||||
index cc431b1d76..93d1c60ac1 100644
|
||||
--- a/target/i386/cpu.h
|
||||
+++ b/target/i386/cpu.h
|
||||
@@ -550,6 +550,10 @@ typedef enum X86Seg {
|
||||
#define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT)
|
||||
#define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT)
|
||||
#define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT)
|
||||
+#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
|
||||
+#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT)
|
||||
+
|
||||
+#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK)
|
||||
|
||||
#define ESA_FEATURE_ALIGN64_BIT 1
|
||||
|
||||
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
|
||||
index ce27d3b1df..a35a1bf9fe 100644
|
||||
--- a/target/i386/kvm/kvm-cpu.c
|
||||
+++ b/target/i386/kvm/kvm-cpu.c
|
||||
@@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
|
||||
static void kvm_cpu_xsave_init(void)
|
||||
{
|
||||
static bool first = true;
|
||||
- KVMState *s = kvm_state;
|
||||
+ uint32_t eax, ebx, ecx, edx;
|
||||
int i;
|
||||
|
||||
if (!first) {
|
||||
@@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void)
|
||||
ExtSaveArea *esa = &x86_ext_save_areas[i];
|
||||
|
||||
if (esa->size) {
|
||||
- int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
|
||||
- if (sz != 0) {
|
||||
- assert(esa->size == sz);
|
||||
- esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
|
||||
- esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
|
||||
+ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
|
||||
+ if (eax != 0) {
|
||||
+ assert(esa->size == eax);
|
||||
+ esa->offset = ebx;
|
||||
+ esa->ecx = ecx;
|
||||
}
|
||||
}
|
||||
}
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 5a698bde19..e7f57d05a2 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "qapi/error.h"
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/utsname.h>
|
||||
+#include <sys/syscall.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
#include "standard-headers/asm-x86/kvm_para.h"
|
||||
@@ -347,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
|
||||
struct kvm_cpuid2 *cpuid;
|
||||
uint32_t ret = 0;
|
||||
uint32_t cpuid_1_edx;
|
||||
+ uint64_t bitmask;
|
||||
|
||||
cpuid = get_supported_cpuid(s);
|
||||
|
||||
@@ -404,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
|
||||
if (!has_msr_arch_capabs) {
|
||||
ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
|
||||
}
|
||||
+ } else if (function == 0xd && index == 0 &&
|
||||
+ (reg == R_EAX || reg == R_EDX)) {
|
||||
+ struct kvm_device_attr attr = {
|
||||
+ .group = 0,
|
||||
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
|
||||
+ .addr = (unsigned long) &bitmask
|
||||
+ };
|
||||
+
|
||||
+ bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
|
||||
+ if (!sys_attr) {
|
||||
+ warn_report("cannot get sys attribute capabilities %d", sys_attr);
|
||||
+ }
|
||||
+
|
||||
+ int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
|
||||
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
|
||||
+ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
|
||||
+ "error: %d", rc);
|
||||
+ }
|
||||
+ ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
|
||||
} else if (function == 0x80000001 && reg == R_ECX) {
|
||||
/*
|
||||
* It's safe to enable TOPOEXT even if it's not returned by
|
||||
@@ -5050,3 +5071,39 @@ bool kvm_arch_cpu_check_are_resettable(void)
|
||||
{
|
||||
return !sev_es_enabled();
|
||||
}
|
||||
+
|
||||
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
|
||||
+
|
||||
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
|
||||
+{
|
||||
+ KVMState *s = kvm_state;
|
||||
+ uint64_t supported;
|
||||
+
|
||||
+ mask &= XSTATE_DYNAMIC_MASK;
|
||||
+ if (!mask) {
|
||||
+ return;
|
||||
+ }
|
||||
+ /*
|
||||
+ * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0].
|
||||
+ * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned
|
||||
+ * about them already because they are not supported features.
|
||||
+ */
|
||||
+ supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
|
||||
+ supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
|
||||
+ mask &= supported;
|
||||
+
|
||||
+ while (mask) {
|
||||
+ int bit = ctz64(mask);
|
||||
+ int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
|
||||
+ if (rc) {
|
||||
+ /*
|
||||
+ * Older kernel version (<5.17) do not support
|
||||
+ * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return
|
||||
+ * any dynamic feature from kvm_arch_get_supported_cpuid.
|
||||
+ */
|
||||
+ warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
|
||||
+ "for feature bit %d", bit);
|
||||
+ }
|
||||
+ mask &= ~BIT_ULL(bit);
|
||||
+ }
|
||||
+}
|
||||
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
|
||||
index a978509d50..4124912c20 100644
|
||||
--- a/target/i386/kvm/kvm_i386.h
|
||||
+++ b/target/i386/kvm/kvm_i386.h
|
||||
@@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
|
||||
uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
|
||||
|
||||
bool kvm_enable_sgx_provisioning(KVMState *s);
|
||||
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
|
||||
|
||||
#endif
|
||||
--
|
||||
2.27.0
|
||||
|
||||
182
x86-Support-XFD-and-AMX-xsave-data-migration.patch
Normal file
182
x86-Support-XFD-and-AMX-xsave-data-migration.patch
Normal file
@ -0,0 +1,182 @@
|
||||
From bb1b53e5d0b67d97042ea3c33b5c4c80e33809f2 Mon Sep 17 00:00:00 2001
|
||||
From: Zeng Guang <guang.zeng@intel.com>
|
||||
Date: Wed, 16 Feb 2022 22:04:33 -0800
|
||||
Subject: [PATCH 08/10] x86: Support XFD and AMX xsave data migration
|
||||
|
||||
from mainline-v7.0.0-rc0
|
||||
commit cdec2b753b487d9e8aab028231c35d87789ea083
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit cdec2b753b48 ("x86: Support XFD and AMX xsave data
|
||||
migration")
|
||||
|
||||
------------------------------------------------
|
||||
|
||||
x86: Support XFD and AMX xsave data migration
|
||||
|
||||
XFD(eXtended Feature Disable) allows to enable a
|
||||
feature on xsave state while preventing specific
|
||||
user threads from using the feature.
|
||||
|
||||
Support save and restore XFD MSRs if CPUID.D.1.EAX[4]
|
||||
enumerate to be valid. Likewise migrate the MSRs and
|
||||
related xsave state necessarily.
|
||||
|
||||
Signed-off-by: Zeng Guang <guang.zeng@intel.com>
|
||||
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
|
||||
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
|
||||
Message-Id: <20220217060434.52460-8-yang.zhong@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
target/i386/cpu.h | 9 +++++++++
|
||||
target/i386/kvm/kvm.c | 18 +++++++++++++++++
|
||||
target/i386/machine.c | 46 +++++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 73 insertions(+)
|
||||
|
||||
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
|
||||
index 74e66c352c..eaa99c302f 100644
|
||||
--- a/target/i386/cpu.h
|
||||
+++ b/target/i386/cpu.h
|
||||
@@ -506,6 +506,9 @@ typedef enum X86Seg {
|
||||
|
||||
#define MSR_VM_HSAVE_PA 0xc0010117
|
||||
|
||||
+#define MSR_IA32_XFD 0x000001c4
|
||||
+#define MSR_IA32_XFD_ERR 0x000001c5
|
||||
+
|
||||
#define MSR_IA32_BNDCFGS 0x00000d90
|
||||
#define MSR_IA32_XSS 0x00000da0
|
||||
#define MSR_IA32_UMWAIT_CONTROL 0xe1
|
||||
@@ -871,6 +874,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
|
||||
#define CPUID_7_1_EAX_AVX_VNNI (1U << 4)
|
||||
/* AVX512 BFloat16 Instruction */
|
||||
#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5)
|
||||
+/* XFD Extend Feature Disabled */
|
||||
+#define CPUID_D_1_EAX_XFD (1U << 4)
|
||||
|
||||
/* Packets which contain IP payload have LIP values */
|
||||
#define CPUID_14_0_ECX_LIP (1U << 31)
|
||||
@@ -1612,6 +1617,10 @@ typedef struct CPUX86State {
|
||||
uint64_t msr_rtit_cr3_match;
|
||||
uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS];
|
||||
|
||||
+ /* Per-VCPU XFD MSRs */
|
||||
+ uint64_t msr_xfd;
|
||||
+ uint64_t msr_xfd_err;
|
||||
+
|
||||
/* exception/interrupt handling */
|
||||
int error_code;
|
||||
int exception_is_int;
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index b0b22dcf7c..49fca5ea88 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -3219,6 +3219,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
|
||||
env->msr_ia32_sgxlepubkeyhash[3]);
|
||||
}
|
||||
|
||||
+ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
|
||||
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD,
|
||||
+ env->msr_xfd);
|
||||
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
|
||||
+ env->msr_xfd_err);
|
||||
+ }
|
||||
+
|
||||
/* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
|
||||
* kvm_put_msr_feature_control. */
|
||||
}
|
||||
@@ -3570,6 +3577,11 @@ static int kvm_get_msrs(X86CPU *cpu)
|
||||
kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
|
||||
}
|
||||
|
||||
+ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
|
||||
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
|
||||
+ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
|
||||
+ }
|
||||
+
|
||||
ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
@@ -3866,6 +3878,12 @@ static int kvm_get_msrs(X86CPU *cpu)
|
||||
env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
|
||||
msrs[i].data;
|
||||
break;
|
||||
+ case MSR_IA32_XFD:
|
||||
+ env->msr_xfd = msrs[i].data;
|
||||
+ break;
|
||||
+ case MSR_IA32_XFD_ERR:
|
||||
+ env->msr_xfd_err = msrs[i].data;
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/target/i386/machine.c b/target/i386/machine.c
|
||||
index 83c2b91529..3977e9d8f8 100644
|
||||
--- a/target/i386/machine.c
|
||||
+++ b/target/i386/machine.c
|
||||
@@ -1455,6 +1455,48 @@ static const VMStateDescription vmstate_msr_intel_sgx = {
|
||||
}
|
||||
};
|
||||
|
||||
+static bool xfd_msrs_needed(void *opaque)
|
||||
+{
|
||||
+ X86CPU *cpu = opaque;
|
||||
+ CPUX86State *env = &cpu->env;
|
||||
+
|
||||
+ return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD);
|
||||
+}
|
||||
+
|
||||
+static const VMStateDescription vmstate_msr_xfd = {
|
||||
+ .name = "cpu/msr_xfd",
|
||||
+ .version_id = 1,
|
||||
+ .minimum_version_id = 1,
|
||||
+ .needed = xfd_msrs_needed,
|
||||
+ .fields = (VMStateField[]) {
|
||||
+ VMSTATE_UINT64(env.msr_xfd, X86CPU),
|
||||
+ VMSTATE_UINT64(env.msr_xfd_err, X86CPU),
|
||||
+ VMSTATE_END_OF_LIST()
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+#ifdef TARGET_X86_64
|
||||
+static bool amx_xtile_needed(void *opaque)
|
||||
+{
|
||||
+ X86CPU *cpu = opaque;
|
||||
+ CPUX86State *env = &cpu->env;
|
||||
+
|
||||
+ return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE);
|
||||
+}
|
||||
+
|
||||
+static const VMStateDescription vmstate_amx_xtile = {
|
||||
+ .name = "cpu/intel_amx_xtile",
|
||||
+ .version_id = 1,
|
||||
+ .minimum_version_id = 1,
|
||||
+ .needed = amx_xtile_needed,
|
||||
+ .fields = (VMStateField[]) {
|
||||
+ VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64),
|
||||
+ VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192),
|
||||
+ VMSTATE_END_OF_LIST()
|
||||
+ }
|
||||
+};
|
||||
+#endif
|
||||
+
|
||||
const VMStateDescription vmstate_x86_cpu = {
|
||||
.name = "cpu",
|
||||
.version_id = 12,
|
||||
@@ -1593,6 +1635,10 @@ const VMStateDescription vmstate_x86_cpu = {
|
||||
#endif
|
||||
&vmstate_msr_tsx_ctrl,
|
||||
&vmstate_msr_intel_sgx,
|
||||
+ &vmstate_msr_xfd,
|
||||
+#ifdef TARGET_X86_64
|
||||
+ &vmstate_amx_xtile,
|
||||
+#endif
|
||||
NULL
|
||||
}
|
||||
};
|
||||
--
|
||||
2.27.0
|
||||
|
||||
186
x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch
Normal file
186
x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch
Normal file
@ -0,0 +1,186 @@
|
||||
From e98958c23ea5b15a8e84642c373336a8898cd63f Mon Sep 17 00:00:00 2001
|
||||
From: Jing Liu <jing2.liu@intel.com>
|
||||
Date: Wed, 16 Feb 2022 22:04:32 -0800
|
||||
Subject: [PATCH 07/10] x86: add support for KVM_CAP_XSAVE2 and AMX state
|
||||
migration
|
||||
|
||||
from mainline-v7.0.0-rc0
|
||||
commit e56dd3c70abb31893c61ac834109fa7a38841330
|
||||
category: feature
|
||||
feature: SPR AMX support for Qemu
|
||||
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I5VHOB
|
||||
|
||||
Intel-SIG: commit e56dd3c70abb ("x86: add support for KVM_CAP_XSAVE2 and
|
||||
AMX state migration")
|
||||
|
||||
-------------------------------------------------------
|
||||
|
||||
x86: add support for KVM_CAP_XSAVE2 and AMX state migration
|
||||
|
||||
When dynamic xfeatures (e.g. AMX) are used by the guest, the xsave
|
||||
area would be larger than 4KB. KVM_GET_XSAVE2 and KVM_SET_XSAVE
|
||||
under KVM_CAP_XSAVE2 works with a xsave buffer larger than 4KB.
|
||||
Always use the new ioctls under KVM_CAP_XSAVE2 when KVM supports it.
|
||||
|
||||
Signed-off-by: Jing Liu <jing2.liu@intel.com>
|
||||
Signed-off-by: Zeng Guang <guang.zeng@intel.com>
|
||||
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
|
||||
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
|
||||
Message-Id: <20220217060434.52460-7-yang.zhong@intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||||
---
|
||||
target/i386/cpu.h | 4 ++++
|
||||
target/i386/kvm/kvm.c | 42 ++++++++++++++++++++++++--------------
|
||||
target/i386/xsave_helper.c | 28 +++++++++++++++++++++++++
|
||||
3 files changed, 59 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
|
||||
index 09c725ee13..74e66c352c 100644
|
||||
--- a/target/i386/cpu.h
|
||||
+++ b/target/i386/cpu.h
|
||||
@@ -1523,6 +1523,10 @@ typedef struct CPUX86State {
|
||||
uint64_t opmask_regs[NB_OPMASK_REGS];
|
||||
YMMReg zmmh_regs[CPU_NB_REGS];
|
||||
ZMMReg hi16_zmm_regs[CPU_NB_REGS];
|
||||
+#ifdef TARGET_X86_64
|
||||
+ uint8_t xtilecfg[64];
|
||||
+ uint8_t xtiledata[8192];
|
||||
+#endif
|
||||
|
||||
/* sysenter registers */
|
||||
uint32_t sysenter_cs;
|
||||
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||||
index 60ccdec5e8..b0b22dcf7c 100644
|
||||
--- a/target/i386/kvm/kvm.c
|
||||
+++ b/target/i386/kvm/kvm.c
|
||||
@@ -123,6 +123,7 @@ static uint32_t num_architectural_pmu_gp_counters;
|
||||
static uint32_t num_architectural_pmu_fixed_counters;
|
||||
|
||||
static int has_xsave;
|
||||
+static int has_xsave2;
|
||||
static int has_xcrs;
|
||||
static int has_pit_state2;
|
||||
static int has_exception_payload;
|
||||
@@ -1585,6 +1586,26 @@ static Error *invtsc_mig_blocker;
|
||||
|
||||
#define KVM_MAX_CPUID_ENTRIES 100
|
||||
|
||||
+static void kvm_init_xsave(CPUX86State *env)
|
||||
+{
|
||||
+ if (has_xsave2) {
|
||||
+ env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);
|
||||
+ } else if (has_xsave) {
|
||||
+ env->xsave_buf_len = sizeof(struct kvm_xsave);
|
||||
+ } else {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
|
||||
+ memset(env->xsave_buf, 0, env->xsave_buf_len);
|
||||
+ /*
|
||||
+ * The allocated storage must be large enough for all of the
|
||||
+ * possible XSAVE state components.
|
||||
+ */
|
||||
+ assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
|
||||
+ env->xsave_buf_len);
|
||||
+}
|
||||
+
|
||||
int kvm_arch_init_vcpu(CPUState *cs)
|
||||
{
|
||||
struct {
|
||||
@@ -1614,6 +1635,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
|
||||
cpuid_i = 0;
|
||||
|
||||
+ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
|
||||
+
|
||||
r = kvm_arch_set_tsc_khz(cs);
|
||||
if (r < 0) {
|
||||
return r;
|
||||
@@ -2003,19 +2026,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
if (r) {
|
||||
goto fail;
|
||||
}
|
||||
-
|
||||
- if (has_xsave) {
|
||||
- env->xsave_buf_len = sizeof(struct kvm_xsave);
|
||||
- env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
|
||||
- memset(env->xsave_buf, 0, env->xsave_buf_len);
|
||||
-
|
||||
- /*
|
||||
- * The allocated storage must be large enough for all of the
|
||||
- * possible XSAVE state components.
|
||||
- */
|
||||
- assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX)
|
||||
- <= env->xsave_buf_len);
|
||||
- }
|
||||
+ kvm_init_xsave(env);
|
||||
|
||||
max_nested_state_len = kvm_max_nested_state_length();
|
||||
if (max_nested_state_len > 0) {
|
||||
@@ -3263,13 +3274,14 @@ static int kvm_get_xsave(X86CPU *cpu)
|
||||
{
|
||||
CPUX86State *env = &cpu->env;
|
||||
void *xsave = env->xsave_buf;
|
||||
- int ret;
|
||||
+ int type, ret;
|
||||
|
||||
if (!has_xsave) {
|
||||
return kvm_get_fpu(cpu);
|
||||
}
|
||||
|
||||
- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
|
||||
+ type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
|
||||
+ ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
|
||||
index ac61a96344..996e9f3bfe 100644
|
||||
--- a/target/i386/xsave_helper.c
|
||||
+++ b/target/i386/xsave_helper.c
|
||||
@@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
|
||||
|
||||
memcpy(pkru, &env->pkru, sizeof(env->pkru));
|
||||
}
|
||||
+
|
||||
+ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
|
||||
+ if (e->size && e->offset) {
|
||||
+ XSaveXTILECFG *tilecfg = buf + e->offset;
|
||||
+
|
||||
+ memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg));
|
||||
+ }
|
||||
+
|
||||
+ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
|
||||
+ if (e->size && e->offset && buflen >= e->size + e->offset) {
|
||||
+ XSaveXTILEDATA *tiledata = buf + e->offset;
|
||||
+
|
||||
+ memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata));
|
||||
+ }
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen)
|
||||
pkru = buf + e->offset;
|
||||
memcpy(&env->pkru, pkru, sizeof(env->pkru));
|
||||
}
|
||||
+
|
||||
+ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
|
||||
+ if (e->size && e->offset) {
|
||||
+ const XSaveXTILECFG *tilecfg = buf + e->offset;
|
||||
+
|
||||
+ memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg));
|
||||
+ }
|
||||
+
|
||||
+ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
|
||||
+ if (e->size && e->offset && buflen >= e->size + e->offset) {
|
||||
+ const XSaveXTILEDATA *tiledata = buf + e->offset;
|
||||
+
|
||||
+ memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata));
|
||||
+ }
|
||||
#endif
|
||||
}
|
||||
--
|
||||
2.27.0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user