271 lines
10 KiB
Diff
271 lines
10 KiB
Diff
|
|
From 06d1ed3c9e3b736944e5267ffc8d341801fb758b Mon Sep 17 00:00:00 2001
|
||
|
|
From: Chenyi Qiang <chenyi.qiang@intel.com>
|
||
|
|
Date: Thu, 29 Sep 2022 15:20:14 +0800
|
||
|
|
Subject: [PATCH] i386: add notify VM exit support
|
||
|
|
|
||
|
|
from mainline-v7.2.0-rc0
|
||
|
|
commit e2e69f6bb907a70ac518230c54e98e7abcb0c911
|
||
|
|
category: feature
|
||
|
|
feature: Notify VM Exit
|
||
|
|
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6GWQE
|
||
|
|
|
||
|
|
Intel-SIG: commit e2e69f6bb907 ("i386: add notify VM exit support")
|
||
|
|
|
||
|
|
------------------------------------------------------------------
|
||
|
|
|
||
|
|
i386: add notify VM exit support
|
||
|
|
|
||
|
|
There are cases that malicious virtual machine can cause CPU stuck (due
|
||
|
|
to event windows don't open up), e.g., infinite loop in microcode when
|
||
|
|
nested #AC (CVE-2015-5307). No event window means no event (NMI, SMI and
|
||
|
|
IRQ) can be delivered. It leads the CPU to be unavailable to host or
|
||
|
|
other VMs. Notify VM exit is introduced to mitigate such kind of
|
||
|
|
attacks, which will generate a VM exit if no event window occurs in VM
|
||
|
|
non-root mode for a specified amount of time (notify window).
|
||
|
|
|
||
|
|
A new KVM capability KVM_CAP_X86_NOTIFY_VMEXIT is exposed to user space
|
||
|
|
so that the user can query the capability and set the expected notify
|
||
|
|
window when creating VMs. The format of the argument when enabling this
|
||
|
|
capability is as follows:
|
||
|
|
Bit 63:32 - notify window specified in qemu command
|
||
|
|
Bit 31:0 - some flags (e.g. KVM_X86_NOTIFY_VMEXIT_ENABLED is set to
|
||
|
|
enable the feature.)
|
||
|
|
|
||
|
|
Users can configure the feature by a new (x86 only) accel property:
|
||
|
|
qemu -accel kvm,notify-vmexit=run|internal-error|disable,notify-window=n
|
||
|
|
|
||
|
|
The default option of notify-vmexit is run, which will enable the
|
||
|
|
capability and do nothing if the exit happens. The internal-error option
|
||
|
|
raises a KVM internal error if it happens. The disable option does not
|
||
|
|
enable the capability. The default value of notify-window is 0. It is valid
|
||
|
|
only when notify-vmexit is not disabled. The valid range of notify-window
|
||
|
|
is non-negative. It is even safe to set it to zero since there's an
|
||
|
|
internal hardware threshold to be added to ensure no false positive.
|
||
|
|
|
||
|
|
Because a notify VM exit may happen with VM_CONTEXT_INVALID set in exit
|
||
|
|
qualification (no cases are anticipated that would set this bit), which
|
||
|
|
means VM context is corrupted. It would be reflected in the flags of
|
||
|
|
KVM_EXIT_NOTIFY exit. If KVM_NOTIFY_CONTEXT_INVALID bit is set, raise a KVM
|
||
|
|
internal error unconditionally.
|
||
|
|
|
||
|
|
Acked-by: Peter Xu <peterx@redhat.com>
|
||
|
|
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
|
||
|
|
Message-Id: <20220929072014.20705-5-chenyi.qiang@intel.com>
|
||
|
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||
|
|
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
|
||
|
|
---
|
||
|
|
accel/kvm/kvm-all.c | 2 +
|
||
|
|
qapi/run-state.json | 17 ++++++++
|
||
|
|
qemu-options.hx | 11 +++++
|
||
|
|
target/i386/kvm/kvm.c | 98 +++++++++++++++++++++++++++++++++++++++++++
|
||
|
|
4 files changed, 128 insertions(+)
|
||
|
|
|
||
|
|
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||
|
|
index 91d93facf2..799d993f6c 100644
|
||
|
|
--- a/accel/kvm/kvm-all.c
|
||
|
|
+++ b/accel/kvm/kvm-all.c
|
||
|
|
@@ -3602,6 +3602,8 @@ static void kvm_accel_instance_init(Object *obj)
|
||
|
|
s->kernel_irqchip_split = ON_OFF_AUTO_AUTO;
|
||
|
|
/* KVM dirty ring is by default off */
|
||
|
|
s->kvm_dirty_ring_size = 0;
|
||
|
|
+ s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN;
|
||
|
|
+ s->notify_window = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
static void kvm_accel_class_init(ObjectClass *oc, void *data)
|
||
|
|
diff --git a/qapi/run-state.json b/qapi/run-state.json
|
||
|
|
index 43d66d700f..08c38b2c67 100644
|
||
|
|
--- a/qapi/run-state.json
|
||
|
|
+++ b/qapi/run-state.json
|
||
|
|
@@ -638,3 +638,20 @@
|
||
|
|
{ 'struct': 'MemoryFailureFlags',
|
||
|
|
'data': { 'action-required': 'bool',
|
||
|
|
'recursive': 'bool'} }
|
||
|
|
+
|
||
|
|
+##
|
||
|
|
+# @NotifyVmexitOption:
|
||
|
|
+#
|
||
|
|
+# An enumeration of the options specified when enabling notify VM exit
|
||
|
|
+#
|
||
|
|
+# @run: enable the feature, do nothing and continue if the notify VM exit happens.
|
||
|
|
+#
|
||
|
|
+# @internal-error: enable the feature, raise a internal error if the notify
|
||
|
|
+# VM exit happens.
|
||
|
|
+#
|
||
|
|
+# @disable: disable the feature.
|
||
|
|
+#
|
||
|
|
+# Since: 7.2
|
||
|
|
+##
|
||
|
|
+{ 'enum': 'NotifyVmexitOption',
|
||
|
|
+ 'data': [ 'run', 'internal-error', 'disable' ] }
|
||
|
|
\ No newline at end of file
|
||
|
|
diff --git a/qemu-options.hx b/qemu-options.hx
|
||
|
|
index 047d28a357..3c9b0f022c 100644
|
||
|
|
--- a/qemu-options.hx
|
||
|
|
+++ b/qemu-options.hx
|
||
|
|
@@ -152,6 +152,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
|
||
|
|
" split-wx=on|off (enable TCG split w^x mapping)\n"
|
||
|
|
" tb-size=n (TCG translation block cache size)\n"
|
||
|
|
" dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n"
|
||
|
|
+ " notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM exit and set notify window, x86 only)\n"
|
||
|
|
" thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL)
|
||
|
|
SRST
|
||
|
|
``-accel name[,prop=value[,...]]``
|
||
|
|
@@ -203,6 +204,16 @@ SRST
|
||
|
|
is disabled (dirty-ring-size=0). When enabled, KVM will instead
|
||
|
|
record dirty pages in a bitmap.
|
||
|
|
|
||
|
|
+ ``notify-vmexit=run|internal-error|disable,notify-window=n``
|
||
|
|
+ Enables or disables notify VM exit support on x86 host and specify
|
||
|
|
+ the corresponding notify window to trigger the VM exit if enabled.
|
||
|
|
+ ``run`` option enables the feature. It does nothing and continue
|
||
|
|
+ if the exit happens. ``internal-error`` option enables the feature.
|
||
|
|
+ It raises a internal error. ``disable`` option doesn't enable the feature.
|
||
|
|
+ This feature can mitigate the CPU stuck issue due to event windows don't
|
||
|
|
+ open up for a specified of time (i.e. notify-window).
|
||
|
|
+ Default: notify-vmexit=run,notify-window=0.
|
||
|
|
+
|
||
|
|
ERST
|
||
|
|
|
||
|
|
DEF("smp", HAS_ARG, QEMU_OPTION_smp,
|
||
|
|
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
|
||
|
|
index e2f28ce958..b8257e7e5f 100644
|
||
|
|
--- a/target/i386/kvm/kvm.c
|
||
|
|
+++ b/target/i386/kvm/kvm.c
|
||
|
|
@@ -15,6 +15,7 @@
|
||
|
|
#include "qemu/osdep.h"
|
||
|
|
#include "qapi/qapi-events-run-state.h"
|
||
|
|
#include "qapi/error.h"
|
||
|
|
+#include "qapi/visitor.h"
|
||
|
|
#include <sys/ioctl.h>
|
||
|
|
#include <sys/utsname.h>
|
||
|
|
#include <sys/syscall.h>
|
||
|
|
@@ -2496,6 +2497,21 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
+ if (s->notify_vmexit != NOTIFY_VMEXIT_OPTION_DISABLE &&
|
||
|
|
+ kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) {
|
||
|
|
+ uint64_t notify_window_flags =
|
||
|
|
+ ((uint64_t)s->notify_window << 32) |
|
||
|
|
+ KVM_X86_NOTIFY_VMEXIT_ENABLED |
|
||
|
|
+ KVM_X86_NOTIFY_VMEXIT_USER;
|
||
|
|
+ ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0,
|
||
|
|
+ notify_window_flags);
|
||
|
|
+ if (ret < 0) {
|
||
|
|
+ error_report("kvm: Failed to enable notify vmexit cap: %s",
|
||
|
|
+ strerror(-ret));
|
||
|
|
+ return ret;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -4839,6 +4855,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
|
||
|
|
X86CPU *cpu = X86_CPU(cs);
|
||
|
|
uint64_t code;
|
||
|
|
int ret;
|
||
|
|
+ bool ctx_invalid;
|
||
|
|
+ char str[256];
|
||
|
|
+ KVMState *state;
|
||
|
|
|
||
|
|
switch (run->exit_reason) {
|
||
|
|
case KVM_EXIT_HLT:
|
||
|
|
@@ -4894,6 +4913,21 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
|
||
|
|
/* already handled in kvm_arch_post_run */
|
||
|
|
ret = 0;
|
||
|
|
break;
|
||
|
|
+ case KVM_EXIT_NOTIFY:
|
||
|
|
+ ctx_invalid = !!(run->notify.flags & KVM_NOTIFY_CONTEXT_INVALID);
|
||
|
|
+ state = KVM_STATE(current_accel());
|
||
|
|
+ sprintf(str, "Encounter a notify exit with %svalid context in"
|
||
|
|
+ " guest. There can be possible misbehaves in guest."
|
||
|
|
+ " Please have a look.", ctx_invalid ? "in" : "");
|
||
|
|
+ if (ctx_invalid ||
|
||
|
|
+ state->notify_vmexit == NOTIFY_VMEXIT_OPTION_INTERNAL_ERROR) {
|
||
|
|
+ warn_report("KVM internal error: %s", str);
|
||
|
|
+ ret = -1;
|
||
|
|
+ } else {
|
||
|
|
+ warn_report_once("KVM: %s", str);
|
||
|
|
+ ret = 0;
|
||
|
|
+ }
|
||
|
|
+ break;
|
||
|
|
default:
|
||
|
|
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
|
||
|
|
ret = -1;
|
||
|
|
@@ -5169,6 +5203,70 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
+static int kvm_arch_get_notify_vmexit(Object *obj, Error **errp)
|
||
|
|
+{
|
||
|
|
+ KVMState *s = KVM_STATE(obj);
|
||
|
|
+ return s->notify_vmexit;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void kvm_arch_set_notify_vmexit(Object *obj, int value, Error **errp)
|
||
|
|
+{
|
||
|
|
+ KVMState *s = KVM_STATE(obj);
|
||
|
|
+
|
||
|
|
+ if (s->fd != -1) {
|
||
|
|
+ error_setg(errp, "Cannot set properties after the accelerator has been initialized");
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ s->notify_vmexit = value;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void kvm_arch_get_notify_window(Object *obj, Visitor *v,
|
||
|
|
+ const char *name, void *opaque,
|
||
|
|
+ Error **errp)
|
||
|
|
+{
|
||
|
|
+ KVMState *s = KVM_STATE(obj);
|
||
|
|
+ uint32_t value = s->notify_window;
|
||
|
|
+
|
||
|
|
+ visit_type_uint32(v, name, &value, errp);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void kvm_arch_set_notify_window(Object *obj, Visitor *v,
|
||
|
|
+ const char *name, void *opaque,
|
||
|
|
+ Error **errp)
|
||
|
|
+{
|
||
|
|
+ KVMState *s = KVM_STATE(obj);
|
||
|
|
+ Error *error = NULL;
|
||
|
|
+ uint32_t value;
|
||
|
|
+
|
||
|
|
+ if (s->fd != -1) {
|
||
|
|
+ error_setg(errp, "Cannot set properties after the accelerator has been initialized");
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ visit_type_uint32(v, name, &value, &error);
|
||
|
|
+ if (error) {
|
||
|
|
+ error_propagate(errp, error);
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ s->notify_window = value;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
void kvm_arch_accel_class_init(ObjectClass *oc)
|
||
|
|
{
|
||
|
|
+ object_class_property_add_enum(oc, "notify-vmexit", "NotifyVMexitOption",
|
||
|
|
+ &NotifyVmexitOption_lookup,
|
||
|
|
+ kvm_arch_get_notify_vmexit,
|
||
|
|
+ kvm_arch_set_notify_vmexit);
|
||
|
|
+ object_class_property_set_description(oc, "notify-vmexit",
|
||
|
|
+ "Enable notify VM exit");
|
||
|
|
+
|
||
|
|
+ object_class_property_add(oc, "notify-window", "uint32",
|
||
|
|
+ kvm_arch_get_notify_window,
|
||
|
|
+ kvm_arch_set_notify_window,
|
||
|
|
+ NULL, NULL);
|
||
|
|
+ object_class_property_set_description(oc, "notify-window",
|
||
|
|
+ "Clock cycles without an event window "
|
||
|
|
+ "after which a notification VM exit occurs");
|
||
|
|
}
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|