- vdpa-dev: Fix initialisation order to restore VDUSE compatibility - tcg: Allow top bit of SIMD_DATA_BITS to be set in simd_desc() - migration: fix-possible-int-overflow - target/m68k: Map FPU exceptions to FPSR register - qemu-options: Fix CXL Fixed Memory Window interleave-granularity typo - hvf: arm: Fix encodings for ID_AA64PFR1_EL1 and debug System registers - hw/intc/arm_gic: Fix handling of NS view of GICC_APR<n> - qio: Inherit follow_coroutine_ctx across TLS - target/riscv: Fix the element agnostic function problem - accel/tcg: Fix typo causing tb->page_addr[1] to not be recorded - tcg/loongarch64: Fix tcg_out_movi vs some pcrel pointers - migration: Fix file migration with fdset - ui/vnc: don't return an empty SASL mechlist to the client - target/arm: Fix FJCVTZS vs flush-to-zero - hw/ppc/e500: Prefer QOM cast - sphinx/qapidoc: Fix to generate doc for explicit, unboxed arguments - hw/ppc/e500: Remove unused "irqs" parameter - hw/ppc/e500: Add missing device tree properties to i2c controller node - hw/i386/amd_iommu: Don't leak memory in amdvi_update_iotlb() - hw/arm/mps2-tz.c: fix RX/TX interrupts order - target/i386: csv: Add support to migrate the incoming context for CSV3 guest - target/i386: csv: Add support to migrate the outgoing context for CSV3 guest - target/i386: csv: Add support to migrate the incoming page for CSV3 guest - target/i386: csv: Add support to migrate the outgoing page for CSV3 guest - linux-headers: update kernel headers to include CSV3 migration cmds - vfio: Only map shared region for CSV3 virtual machine - vga: Force full update for CSV3 guest - target/i386: csv: Load initial image to private memory for CSV3 guest - target/i386: csv: Do not register/unregister guest secure memory for CSV3 guest - target/i386: cpu: Populate CPUID 0x8000_001F when CSV3 is active - target/i386: csv: Add command to load vmcb to CSV3 guest memory - target/i386: csv: Add command to load data to CSV3 guest memory - target/i386: csv: Add command to initialize CSV3 context - target/i386: csv: Add CSV3 context - next-kbd: convert to use qemu_input_handler_register() - qemu/bswap: Undefine CPU_CONVERT() once done - exec/memop: Remove unused memop_big_endian() helper - hw/nvme: fix handling of over-committed queues - 9pfs: fix crash on 'Treaddir' request - hw/misc/psp: Pin the hugepage memory specified by mem2 during use for psp - hw/misc: support tkm use mem2 memory - hw/i386: add mem2 option for qemu - kvm: add support for guest physical bits - target/i386: add guest-phys-bits cpu property Signed-off-by: Jiabo Feng <fengjiabo1@huawei.com> (cherry picked from commit f45f35e88509a4ffa9f62332ee9601e9fe1f8d09)
325 lines
10 KiB
Diff
325 lines
10 KiB
Diff
From d29bc8738131dcaaa1a1ae2870ea29b59a137f30 Mon Sep 17 00:00:00 2001
|
|
From: xiongmengbiao <xiongmengbiao@hygon.cn>
|
|
Date: Wed, 29 May 2024 00:05:44 +0800
|
|
Subject: [PATCH] hw/i386: add mem2 option for qemu
|
|
|
|
The '-mem2' option is used to create a set of hugepages
|
|
of memory and map them to a fixed address range of the guest.
|
|
|
|
This allows some devices to easily obtain continuous host
|
|
physical address ranges for performing DMA operations.
|
|
|
|
Signed-off-by: xiongmengbiao <xiongmengbiao@hygon.cn>
|
|
---
|
|
hw/i386/pc.c | 121 ++++++++++++++++++++++++++++++++++++++++++++
|
|
include/hw/boards.h | 2 +
|
|
qemu-options.hx | 12 +++++
|
|
system/vl.c | 76 ++++++++++++++++++++++++++++
|
|
4 files changed, 211 insertions(+)
|
|
|
|
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
|
|
index 29b9964733..204e34db86 100644
|
|
--- a/hw/i386/pc.c
|
|
+++ b/hw/i386/pc.c
|
|
@@ -743,6 +743,111 @@ void xen_load_linux(PCMachineState *pcms)
|
|
x86ms->fw_cfg = fw_cfg;
|
|
}
|
|
|
|
+static int try_create_2MB_page(uint32_t page_num)
|
|
+{
|
|
+ char nr_hp_num_s[256] = {0};
|
|
+ char free_hp_num_s[256] = {0};
|
|
+ const char *nr_hugepages_dir = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages";
|
|
+ const char *free_hugepages_dir = "/sys/kernel/mm/hugepages/hugepages-2048kB/free_hugepages";
|
|
+ int nr_hp_num = -1, free_hp_num = -1, ret = -1;
|
|
+ int nr_fd = qemu_open_old(nr_hugepages_dir, O_RDWR);
|
|
+ int free_fd = qemu_open_old(free_hugepages_dir, O_RDONLY);
|
|
+
|
|
+ if (nr_fd < 0 || free_fd < 0) {
|
|
+ error_report("%s: qemu_open failed: %s\n", __func__, strerror(errno));
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ if (read(nr_fd, nr_hp_num_s, 256) < 0)
|
|
+ goto end;
|
|
+ if (read(free_fd, free_hp_num_s, 256) < 0)
|
|
+ goto end;
|
|
+
|
|
+ nr_hp_num = atoi(nr_hp_num_s);
|
|
+ free_hp_num = atoi(free_hp_num_s);
|
|
+ if (nr_hp_num < 0 || free_hp_num < 0)
|
|
+ goto end;
|
|
+
|
|
+ if (page_num <= free_hp_num) {
|
|
+ ret = 0;
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ nr_hp_num += (page_num - free_hp_num);
|
|
+ snprintf (nr_hp_num_s, 256, "%d", nr_hp_num);
|
|
+ if (write(nr_fd, nr_hp_num_s, strlen(nr_hp_num_s)) < 0)
|
|
+ goto end;
|
|
+
|
|
+ ret = 0;
|
|
+end:
|
|
+ if (nr_fd >= 0)
|
|
+ close(nr_fd);
|
|
+ if (free_fd >= 0)
|
|
+ close(free_fd);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+#define HUGEPAGE_NUM_MAX 128
|
|
+#define HUGEPAGE_SIZE (1024*1024*2)
|
|
+static void mem2_init(MachineState *ms, MemoryRegion *system_memory)
|
|
+{
|
|
+ MemoryRegion *mem2_mr;
|
|
+ char mr_name[128] = {0};
|
|
+ void *ram = NULL;
|
|
+ int ret = 0, lock_fd;
|
|
+ const char *lock_file = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_overcommit_hugepages";
|
|
+ uint32_t page_num = ms->ram2_size / HUGEPAGE_SIZE, i;
|
|
+
|
|
+ if (HUGEPAGE_NUM_MAX < page_num) {
|
|
+ error_report("\"-mem2 'size='\" needs to Less than %dM\n",
|
|
+ (HUGEPAGE_SIZE * HUGEPAGE_NUM_MAX) / (1024 * 1024));
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+
|
|
+ // Apply for hugepages from OS and use them, which needs to be synchronized
|
|
+ lock_fd = qemu_open_old(lock_file, O_WRONLY);
|
|
+ if (lock_fd < 0) {
|
|
+ error_report("%s: open %s failed: %s\n", __func__, lock_file, strerror(errno));
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+
|
|
+ while (qemu_lock_fd(lock_fd, 0, 0, true)) {
|
|
+ if (errno != EACCES && errno != EAGAIN) {
|
|
+ error_report("qemu_lock_fd failed: %s\n", strerror(errno));
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /** try to create hugepage.
|
|
+ * If there are enough free hugepages, then do nothing.
|
|
+ */
|
|
+ ret = try_create_2MB_page(page_num);
|
|
+ if (ret) {
|
|
+ error_report("%s: Failed to allocate hugepage\n", __func__);
|
|
+ goto unlock;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < page_num; ++i) {
|
|
+ mem2_mr = g_malloc(sizeof(*mem2_mr));
|
|
+ ram = mmap(NULL, HUGEPAGE_SIZE, PROT_READ | PROT_WRITE,
|
|
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | MAP_HUGETLB, -1, 0);
|
|
+ if (ram == MAP_FAILED) {
|
|
+ error_report("%s: mmap failed: %s", __func__, strerror(errno));
|
|
+ goto unlock;
|
|
+ }
|
|
+
|
|
+ sprintf(mr_name, "mem2-%d", i);
|
|
+ memory_region_init_ram_ptr(mem2_mr, NULL, mr_name, HUGEPAGE_SIZE, ram);
|
|
+ memory_region_add_subregion(system_memory, ms->ram2_base + (i * HUGEPAGE_SIZE), mem2_mr);
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+unlock:
|
|
+ qemu_unlock_fd(lock_fd, 0, 0);
|
|
+ if (ret)
|
|
+ exit(EXIT_FAILURE);
|
|
+}
|
|
+
|
|
#define PC_ROM_MIN_VGA 0xc0000
|
|
#define PC_ROM_MIN_OPTION 0xc8000
|
|
#define PC_ROM_MAX 0xe0000
|
|
@@ -965,6 +1070,22 @@ void pc_memory_init(PCMachineState *pcms,
|
|
E820_RAM);
|
|
}
|
|
|
|
+ if (machine->ram2_size && machine->ram2_base) {
|
|
+ if (0x100000000ULL + x86ms->above_4g_mem_size > machine->ram2_base) {
|
|
+ error_report("\"-mem2 'base'\" needs to greater 0x%llx\n",
|
|
+ 0x100000000ULL + x86ms->above_4g_mem_size);
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+ if (machine->ram2_base & (HUGEPAGE_SIZE - 1) ||
|
|
+ machine->ram2_size & (HUGEPAGE_SIZE - 1)) {
|
|
+ error_report("\"-mem2 'base|size'\" needs to aligned to 0x%x\n", HUGEPAGE_SIZE);
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+
|
|
+ mem2_init(machine, system_memory);
|
|
+ e820_add_entry(machine->ram2_base, machine->ram2_size, E820_RAM);
|
|
+ }
|
|
+
|
|
if (pcms->sgx_epc.size != 0) {
|
|
e820_add_entry(pcms->sgx_epc.base, pcms->sgx_epc.size, E820_RESERVED);
|
|
}
|
|
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
|
index da85f86efb..8ac8cad2a2 100644
|
|
--- a/include/hw/boards.h
|
|
+++ b/include/hw/boards.h
|
|
@@ -389,6 +389,8 @@ struct MachineState {
|
|
|
|
ram_addr_t ram_size;
|
|
ram_addr_t maxram_size;
|
|
+ ram_addr_t ram2_base;
|
|
+ ram_addr_t ram2_size;
|
|
uint64_t ram_slots;
|
|
BootConfiguration boot_config;
|
|
char *kernel_filename;
|
|
diff --git a/qemu-options.hx b/qemu-options.hx
|
|
index 42fd09e4de..bc8e66a037 100644
|
|
--- a/qemu-options.hx
|
|
+++ b/qemu-options.hx
|
|
@@ -5845,6 +5845,18 @@ SRST
|
|
(qemu) qom-set /objects/iothread1 poll-max-ns 100000
|
|
ERST
|
|
|
|
+DEF("mem2", HAS_ARG, QEMU_OPTION_mem2,
|
|
+ "-mem2 base=addr[G],size=n[MG]\n"
|
|
+ " Map guest memory using host hugepages\n"
|
|
+ " base: starting position of guest physical address\n"
|
|
+ " size: the size of mmaped memory\n"
|
|
+ "NOTE: Both `base` and `size` need to be aligned according to 2MB\n",
|
|
+ QEMU_ARCH_I386)
|
|
+SRST
|
|
+``-mem2 base=addr[G],size=n[MG]``
|
|
+ Map the host's large page memory at the specified guest address
|
|
+ so that some devices can use larger contiguous physical memory.
|
|
+ERST
|
|
|
|
HXCOMM This is the last statement. Insert new options before this line!
|
|
|
|
diff --git a/system/vl.c b/system/vl.c
|
|
index 8e3357c578..a1e5e68773 100644
|
|
--- a/system/vl.c
|
|
+++ b/system/vl.c
|
|
@@ -173,6 +173,8 @@ static QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list);
|
|
static BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue);
|
|
static bool nographic = false;
|
|
static int mem_prealloc; /* force preallocation of physical target memory */
|
|
+static ram_addr_t ram2_base;
|
|
+static ram_addr_t ram2_size;
|
|
static const char *vga_model = NULL;
|
|
static DisplayOptions dpy;
|
|
static int num_serial_hds;
|
|
@@ -504,6 +506,23 @@ static QemuOptsList qemu_action_opts = {
|
|
},
|
|
};
|
|
|
|
+static QemuOptsList qemu_mem2_opts = {
|
|
+ .name = "mem2",
|
|
+ .merge_lists = true,
|
|
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_mem2_opts.head),
|
|
+ .desc = {
|
|
+ {
|
|
+ .name = "base",
|
|
+ .type = QEMU_OPT_SIZE,
|
|
+ },
|
|
+ {
|
|
+ .name = "size",
|
|
+ .type = QEMU_OPT_SIZE,
|
|
+ },
|
|
+ { /* end of list */ }
|
|
+ },
|
|
+};
|
|
+
|
|
const char *qemu_get_vm_name(void)
|
|
{
|
|
return qemu_name;
|
|
@@ -1932,6 +1951,9 @@ static void qemu_apply_machine_options(QDict *qdict)
|
|
{
|
|
object_set_properties_from_keyval(OBJECT(current_machine), qdict, false, &error_fatal);
|
|
|
|
+ current_machine->ram2_size = ram2_size;
|
|
+ current_machine->ram2_base = ram2_base;
|
|
+
|
|
if (semihosting_enabled(false) && !semihosting_get_argc()) {
|
|
/* fall back to the -kernel/-append */
|
|
semihosting_arg_fallback(current_machine->kernel_filename, current_machine->kernel_cmdline);
|
|
@@ -2094,11 +2116,57 @@ static void parse_memory_options(void)
|
|
loc_pop(&loc);
|
|
}
|
|
|
|
+static void set_mem2_options(void)
|
|
+{
|
|
+ uint64_t sz, base;
|
|
+ const char *mem_str;
|
|
+ QemuOpts *opts = qemu_find_opts_singleton("mem2");
|
|
+ Location loc;
|
|
+
|
|
+ loc_push_none(&loc);
|
|
+ qemu_opts_loc_restore(opts);
|
|
+
|
|
+ mem_str = qemu_opt_get(opts, "base");
|
|
+ if (mem_str) {
|
|
+ if (!*mem_str) {
|
|
+ error_report("missing 'base' option value");
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+
|
|
+ base = qemu_opt_get_size(opts, "base", ram2_base);
|
|
+ ram2_base = base;
|
|
+ }
|
|
+
|
|
+ mem_str = qemu_opt_get(opts, "size");
|
|
+ if (mem_str) {
|
|
+ if (!*mem_str) {
|
|
+ error_report("missing 'base' option value");
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+
|
|
+ sz = qemu_opt_get_size(opts, "size", ram2_size);
|
|
+ ram2_size = sz;
|
|
+ }
|
|
+
|
|
+ if (ram2_base && !ram2_size){
|
|
+ error_report("missing 'size' option value");
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+ if (!ram2_base && ram2_size){
|
|
+ error_report("missing 'base' option value");
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+
|
|
+ loc_pop(&loc);
|
|
+}
|
|
+
|
|
static void qemu_create_machine(QDict *qdict)
|
|
{
|
|
MachineClass *machine_class = select_machine(qdict, &error_fatal);
|
|
object_set_machine_compat_props(machine_class->compat_props);
|
|
|
|
+ set_mem2_options();
|
|
+
|
|
current_machine = MACHINE(object_new_with_class(OBJECT_CLASS(machine_class)));
|
|
object_property_add_child(object_get_root(), "machine",
|
|
OBJECT(current_machine));
|
|
@@ -2777,6 +2845,7 @@ void qemu_init(int argc, char **argv)
|
|
qemu_add_opts(&qemu_semihosting_config_opts);
|
|
qemu_add_opts(&qemu_fw_cfg_opts);
|
|
qemu_add_opts(&qemu_action_opts);
|
|
+ qemu_add_opts(&qemu_mem2_opts);
|
|
qemu_add_run_with_opts();
|
|
module_call_init(MODULE_INIT_OPTS);
|
|
|
|
@@ -3596,6 +3665,13 @@ void qemu_init(int argc, char **argv)
|
|
case QEMU_OPTION_nouserconfig:
|
|
/* Nothing to be parsed here. Especially, do not error out below. */
|
|
break;
|
|
+ case QEMU_OPTION_mem2:
|
|
+ opts = qemu_opts_parse_noisily(qemu_find_opts("mem2"),
|
|
+ optarg, false);
|
|
+ if (!opts) {
|
|
+ exit(EXIT_FAILURE);
|
|
+ }
|
|
+ break;
|
|
#if defined(CONFIG_POSIX)
|
|
case QEMU_OPTION_runas:
|
|
if (!os_set_runas(optarg)) {
|
|
--
|
|
2.41.0.windows.1
|
|
|