QEMU update to version 6.2.0-72(master)

- migration/xbzrle: fix out-of-bounds write with axv512
- migration/xbzrle: use ctz64 to avoid undefined result
- Update bench-code for addressing CI problem
- AVX512 support for xbzrle_encode_buffer
- configure, meson: move AVX tests to meson
- target/i386: KVM: allow fast string operations if host supports them
- target/i386: add FSRM to TCG
- hw/nvme: fix memory leak in nvme_dsm
- aio-posix: fix race between epoll upgrade and aio_set_fd_handler()
- target/i386: Add SGX aex-notify and EDECCSSA support
- hw/usb/imx: Fix out of bounds access in imx_usbphy_read()
- target/i386: Set maximum APIC ID to KVM prior to vCPU creation
- target/i386: Fix sanity check on max APIC ID / X2APIC enablement

Signed-off-by: Fei Xu <xufei30@huawei.com>
This commit is contained in:
Fei Xu 2023-05-17 16:06:00 +08:00
parent 949b72e703
commit 2ddec1532b
14 changed files with 1981 additions and 1 deletions

View File

@ -0,0 +1,310 @@
From 4d572573175449f48fc12c9f9524fc09f219cdbd Mon Sep 17 00:00:00 2001
From: ling xu <ling1.xu@intel.com>
Date: Wed, 16 Nov 2022 23:29:22 +0800
Subject: [PATCH] AVX512 support for xbzrle_encode_buffer
mainline inclusion
from mainline-v8.0.0-rc0
commit 04ffce137b6d85ab4e7687e54e4dffcef0a9ab99
category: feature
feature: AVX512 support for xbzrle_encode_buffer
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6Z50P
Intel-SIG: commit 04ffce137b6d ("AVX512 support for xbzrle_encode_buffer")
-------------------------------------
AVX512 support for xbzrle_encode_buffer
This commit is the same with [PATCH v6 1/2], and provides avx512 support for xbzrle_encode_buffer
function to accelerate xbzrle encoding speed. Runtime check of avx512
support and benchmark for this feature are added. Compared with C
version of xbzrle_encode_buffer function, avx512 version can achieve
50%-70% performance improvement on benchmarking. In addition, if dirty
data is randomly located in 4K page, the avx512 version can achieve
almost 140% performance gain.
Signed-off-by: ling xu <ling1.xu@intel.com>
Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
Co-authored-by: Jun Jin <jun.i.jin@intel.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Aichun Shi <aichun.shi@intel.com>
---
meson.build | 17 +++++
meson_options.txt | 2 +
migration/ram.c | 34 +++++++++-
migration/xbzrle.c | 124 ++++++++++++++++++++++++++++++++++
migration/xbzrle.h | 4 ++
scripts/meson-buildoptions.sh | 3 +
6 files changed, 181 insertions(+), 3 deletions(-)
diff --git a/meson.build b/meson.build
index 9f77254861..45bc69bf0c 100644
--- a/meson.build
+++ b/meson.build
@@ -1816,6 +1816,22 @@ config_host_data.set('CONFIG_AF_VSOCK', cc.compiles(gnu_source_prefix + '''
return -1;
}'''))
+config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
+ .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
+ .require(cc.links('''
+ #pragma GCC push_options
+ #pragma GCC target("avx512bw")
+ #include <cpuid.h>
+ #include <immintrin.h>
+ static int bar(void *a) {
+
+ __m512i *x = a;
+ __m512i res= _mm512_abs_epi8(*x);
+ return res[1];
+ }
+ int main(int argc, char *argv[]) { return bar(argv[0]); }
+ '''), error_message: 'AVX512BW not available').allowed())
+
ignored = ['CONFIG_QEMU_INTERP_PREFIX', # actually per-target
'HAVE_GDB_BIN']
arrays = ['CONFIG_BDRV_RW_WHITELIST', 'CONFIG_BDRV_RO_WHITELIST']
@@ -3318,6 +3334,7 @@ summary_info += {'debug stack usage': config_host.has_key('CONFIG_DEBUG_STACK_US
summary_info += {'mutex debugging': config_host.has_key('CONFIG_DEBUG_MUTEX')}
summary_info += {'memory allocator': get_option('malloc')}
summary_info += {'avx2 optimization': config_host_data.get('CONFIG_AVX2_OPT')}
+summary_info += {'avx512bw optimization': config_host_data.get('CONFIG_AVX512BW_OPT')}
summary_info += {'avx512f optimization': config_host_data.get('CONFIG_AVX512F_OPT')}
summary_info += {'gprof enabled': config_host.has_key('CONFIG_GPROF')}
summary_info += {'gcov': get_option('b_coverage')}
diff --git a/meson_options.txt b/meson_options.txt
index e9cbe48cb9..ec9c3c0a05 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -70,6 +70,8 @@ option('avx2', type: 'feature', value: 'auto',
description: 'AVX2 optimizations')
option('avx512f', type: 'feature', value: 'disabled',
description: 'AVX512F optimizations')
+option('avx512bw', type: 'feature', value: 'auto',
+ description: 'AVX512BW optimizations')
option('attr', type : 'feature', value : 'auto',
description: 'attr/xattr support')
diff --git a/migration/ram.c b/migration/ram.c
index c3484ee1a9..a4383954b4 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -91,6 +91,34 @@ static inline bool is_zero_range(uint8_t *p, uint64_t size)
return buffer_is_zero(p, size);
}
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+ uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+ unsigned max = __get_cpuid_max(0, NULL);
+ int a, b, c, d;
+ if (max >= 1) {
+ __cpuid(1, a, b, c, d);
+ /* We must check that AVX is not just available, but usable. */
+ if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+ int bv;
+ __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+ __cpuid_count(7, 0, a, b, c, d);
+ /* 0xe6:
+ * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+ * and ZMM16-ZMM31 state are enabled by OS)
+ * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+ */
+ if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+ xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+ }
+ }
+ }
+}
+#endif
+
XBZRLECacheStats xbzrle_counters;
/* struct contains XBZRLE cache and a static page
@@ -1031,9 +1059,9 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
/* XBZRLE encoding (if there is no overflow) */
- encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
- TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
- TARGET_PAGE_SIZE);
+ encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
+ TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+ TARGET_PAGE_SIZE);
/*
* Update the cache contents, so that it corresponds to the data
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
index 1ba482ded9..05366e86c0 100644
--- a/migration/xbzrle.c
+++ b/migration/xbzrle.c
@@ -174,3 +174,127 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
return d;
}
+
+#if defined(CONFIG_AVX512BW_OPT)
+#pragma GCC push_options
+#pragma GCC target("avx512bw")
+#include <immintrin.h>
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+ uint8_t *dst, int dlen)
+{
+ uint32_t zrun_len = 0, nzrun_len = 0;
+ int d = 0, i = 0, num = 0;
+ uint8_t *nzrun_start = NULL;
+ /* add 1 to include residual part in main loop */
+ uint32_t count512s = (slen >> 6) + 1;
+ /* countResidual is tail of data, i.e., countResidual = slen % 64 */
+ uint32_t count_residual = slen & 0b111111;
+ bool never_same = true;
+ uint64_t mask_residual = 1;
+ mask_residual <<= count_residual;
+ mask_residual -= 1;
+ __m512i r = _mm512_set1_epi32(0);
+
+ while (count512s) {
+ if (d + 2 > dlen) {
+ return -1;
+ }
+
+ int bytes_to_check = 64;
+ uint64_t mask = 0xffffffffffffffff;
+ if (count512s == 1) {
+ bytes_to_check = count_residual;
+ mask = mask_residual;
+ }
+ __m512i old_data = _mm512_mask_loadu_epi8(r,
+ mask, old_buf + i);
+ __m512i new_data = _mm512_mask_loadu_epi8(r,
+ mask, new_buf + i);
+ uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
+ count512s--;
+
+ bool is_same = (comp & 0x1);
+ while (bytes_to_check) {
+ if (is_same) {
+ if (nzrun_len) {
+ d += uleb128_encode_small(dst + d, nzrun_len);
+ if (d + nzrun_len > dlen) {
+ return -1;
+ }
+ nzrun_start = new_buf + i - nzrun_len;
+ memcpy(dst + d, nzrun_start, nzrun_len);
+ d += nzrun_len;
+ nzrun_len = 0;
+ }
+ /* 64 data at a time for speed */
+ if (count512s && (comp == 0xffffffffffffffff)) {
+ i += 64;
+ zrun_len += 64;
+ break;
+ }
+ never_same = false;
+ num = __builtin_ctzll(~comp);
+ num = (num < bytes_to_check) ? num : bytes_to_check;
+ zrun_len += num;
+ bytes_to_check -= num;
+ comp >>= num;
+ i += num;
+ if (bytes_to_check) {
+ /* still has different data after same data */
+ d += uleb128_encode_small(dst + d, zrun_len);
+ zrun_len = 0;
+ } else {
+ break;
+ }
+ }
+ if (never_same || zrun_len) {
+ /*
+ * never_same only acts if
+ * data begins with diff in first count512s
+ */
+ d += uleb128_encode_small(dst + d, zrun_len);
+ zrun_len = 0;
+ never_same = false;
+ }
+ /* has diff, 64 data at a time for speed */
+ if ((bytes_to_check == 64) && (comp == 0x0)) {
+ i += 64;
+ nzrun_len += 64;
+ break;
+ }
+ num = __builtin_ctzll(comp);
+ num = (num < bytes_to_check) ? num : bytes_to_check;
+ nzrun_len += num;
+ bytes_to_check -= num;
+ comp >>= num;
+ i += num;
+ if (bytes_to_check) {
+ /* mask like 111000 */
+ d += uleb128_encode_small(dst + d, nzrun_len);
+ /* overflow */
+ if (d + nzrun_len > dlen) {
+ return -1;
+ }
+ nzrun_start = new_buf + i - nzrun_len;
+ memcpy(dst + d, nzrun_start, nzrun_len);
+ d += nzrun_len;
+ nzrun_len = 0;
+ is_same = true;
+ }
+ }
+ }
+
+ if (nzrun_len != 0) {
+ d += uleb128_encode_small(dst + d, nzrun_len);
+ /* overflow */
+ if (d + nzrun_len > dlen) {
+ return -1;
+ }
+ nzrun_start = new_buf + i - nzrun_len;
+ memcpy(dst + d, nzrun_start, nzrun_len);
+ d += nzrun_len;
+ }
+ return d;
+}
+#pragma GCC pop_options
+#endif
diff --git a/migration/xbzrle.h b/migration/xbzrle.h
index a0db507b9c..6feb49160a 100644
--- a/migration/xbzrle.h
+++ b/migration/xbzrle.h
@@ -18,4 +18,8 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen);
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
+#if defined(CONFIG_AVX512BW_OPT)
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+ uint8_t *dst, int dlen);
+#endif
#endif
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index b994bf16f0..8c00cce411 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -26,6 +26,7 @@ meson_options_help() {
printf "%s\n" ' attr attr/xattr support'
printf "%s\n" ' auth-pam PAM access control'
printf "%s\n" ' avx2 AVX2 optimizations'
+ printf "%s\n" ' avx512bw AVX512BW optimizations'
printf "%s\n" ' avx512f AVX512F optimizations'
printf "%s\n" ' bpf eBPF support'
printf "%s\n" ' brlapi brlapi character device driver'
@@ -111,6 +112,8 @@ _meson_option_parse() {
--disable-auth-pam) printf "%s" -Dauth_pam=disabled ;;
--enable-avx2) printf "%s" -Davx2=enabled ;;
--disable-avx2) printf "%s" -Davx2=disabled ;;
+ --enable-avx512bw) printf "%s" -Davx512bw=enabled ;;
+ --disable-avx512bw) printf "%s" -Davx512bw=disabled ;;
--enable-avx512f) printf "%s" -Davx512f=enabled ;;
--disable-avx512f) printf "%s" -Davx512f=disabled ;;
--enable-bpf) printf "%s" -Dbpf=enabled ;;
--
2.27.0

View File

@ -0,0 +1,615 @@
From 4fe9da6fdaa5a9a12fdb26bf2a8c5abfccabf9e9 Mon Sep 17 00:00:00 2001
From: ling xu <ling1.xu@intel.com>
Date: Wed, 16 Nov 2022 23:29:23 +0800
Subject: [PATCH] Update bench-code for addressing CI problem
mainline inclusion
from mainline-v8.0.0-rc0
commit cc98c9fd5c17b8ab62ad91b183060d8f70b9d00d
category: feature
feature: AVX512 support for xbzrle_encode_buffer
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6Z50P
Intel-SIG: commit cc98c9fd5c17 ("Update bench-code for addressing CI problem")
-------------------------------------
Update bench-code for addressing CI problem
Unit test code is in test-xbzrle.c, and benchmark code is in xbzrle-bench.c
for performance benchmarking. we have modified xbzrle-bench.c to address
CI problem.
Signed-off-by: ling xu <ling1.xu@intel.com>
Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
Co-authored-by: Jun Jin <jun.i.jin@intel.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Aichun Shi <aichun.shi@intel.com>
---
tests/bench/meson.build | 6 +
tests/bench/xbzrle-bench.c | 469 +++++++++++++++++++++++++++++++++++++
tests/unit/test-xbzrle.c | 39 ++-
3 files changed, 509 insertions(+), 5 deletions(-)
create mode 100644 tests/bench/xbzrle-bench.c
diff --git a/tests/bench/meson.build b/tests/bench/meson.build
index 00b3c209dc..54bc8938a8 100644
--- a/tests/bench/meson.build
+++ b/tests/bench/meson.build
@@ -3,6 +3,12 @@ qht_bench = executable('qht-bench',
sources: 'qht-bench.c',
dependencies: [qemuutil])
+if have_system
+xbzrle_bench = executable('xbzrle-bench',
+ sources: 'xbzrle-bench.c',
+ dependencies: [qemuutil,migration])
+endif
+
executable('atomic_add-bench',
sources: files('atomic_add-bench.c'),
dependencies: [qemuutil],
diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
new file mode 100644
index 0000000000..8848a3a32d
--- /dev/null
+++ b/tests/bench/xbzrle-bench.c
@@ -0,0 +1,469 @@
+/*
+ * Xor Based Zero Run Length Encoding unit tests.
+ *
+ * Copyright 2013 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ * Orit Wasserman <owasserm@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "../migration/xbzrle.h"
+
+#if defined(CONFIG_AVX512BW_OPT)
+#define XBZRLE_PAGE_SIZE 4096
+static bool is_cpu_support_avx512bw;
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+ unsigned max = __get_cpuid_max(0, NULL);
+ int a, b, c, d;
+ is_cpu_support_avx512bw = false;
+ if (max >= 1) {
+ __cpuid(1, a, b, c, d);
+ /* We must check that AVX is not just available, but usable. */
+ if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+ int bv;
+ __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+ __cpuid_count(7, 0, a, b, c, d);
+ /* 0xe6:
+ * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+ * and ZMM16-ZMM31 state are enabled by OS)
+ * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+ */
+ if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+ is_cpu_support_avx512bw = true;
+ }
+ }
+ }
+ return ;
+}
+
+struct ResTime {
+ float t_raw;
+ float t_512;
+};
+
+
+/* Function prototypes
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+ uint8_t *dst, int dlen);
+*/
+static void encode_decode_zero(struct ResTime *res)
+{
+ uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ int i = 0;
+ int dlen = 0, dlen512 = 0;
+ int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+ for (i = diff_len; i > 0; i--) {
+ buffer[1000 + i] = i;
+ buffer512[1000 + i] = i;
+ }
+
+ buffer[1000 + diff_len + 3] = 103;
+ buffer[1000 + diff_len + 5] = 105;
+
+ buffer512[1000 + diff_len + 3] = 103;
+ buffer512[1000 + diff_len + 5] = 105;
+
+ /* encode zero page */
+ time_t t_start, t_end, t_start512, t_end512;
+ t_start = clock();
+ dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
+ XBZRLE_PAGE_SIZE);
+ t_end = clock();
+ float time_val = difftime(t_end, t_start);
+ g_assert(dlen == 0);
+
+ t_start512 = clock();
+ dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE,
+ compressed512, XBZRLE_PAGE_SIZE);
+ t_end512 = clock();
+ float time_val512 = difftime(t_end512, t_start512);
+ g_assert(dlen512 == 0);
+
+ res->t_raw = time_val;
+ res->t_512 = time_val512;
+
+ g_free(buffer);
+ g_free(compressed);
+ g_free(buffer512);
+ g_free(compressed512);
+
+}
+
+static void test_encode_decode_zero_avx512(void)
+{
+ int i;
+ float time_raw = 0.0, time_512 = 0.0;
+ struct ResTime res;
+ for (i = 0; i < 10000; i++) {
+ encode_decode_zero(&res);
+ time_raw += res.t_raw;
+ time_512 += res.t_512;
+ }
+ printf("Zero test:\n");
+ printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+ printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_unchanged(struct ResTime *res)
+{
+ uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ int i = 0;
+ int dlen = 0, dlen512 = 0;
+ int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+ for (i = diff_len; i > 0; i--) {
+ test[1000 + i] = i + 4;
+ test512[1000 + i] = i + 4;
+ }
+
+ test[1000 + diff_len + 3] = 107;
+ test[1000 + diff_len + 5] = 109;
+
+ test512[1000 + diff_len + 3] = 107;
+ test512[1000 + diff_len + 5] = 109;
+
+ /* test unchanged buffer */
+ time_t t_start, t_end, t_start512, t_end512;
+ t_start = clock();
+ dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
+ XBZRLE_PAGE_SIZE);
+ t_end = clock();
+ float time_val = difftime(t_end, t_start);
+ g_assert(dlen == 0);
+
+ t_start512 = clock();
+ dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE,
+ compressed512, XBZRLE_PAGE_SIZE);
+ t_end512 = clock();
+ float time_val512 = difftime(t_end512, t_start512);
+ g_assert(dlen512 == 0);
+
+ res->t_raw = time_val;
+ res->t_512 = time_val512;
+
+ g_free(test);
+ g_free(compressed);
+ g_free(test512);
+ g_free(compressed512);
+
+}
+
+static void test_encode_decode_unchanged_avx512(void)
+{
+ int i;
+ float time_raw = 0.0, time_512 = 0.0;
+ struct ResTime res;
+ for (i = 0; i < 10000; i++) {
+ encode_decode_unchanged(&res);
+ time_raw += res.t_raw;
+ time_512 += res.t_512;
+ }
+ printf("Unchanged test:\n");
+ printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+ printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_1_byte(struct ResTime *res)
+{
+ uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+ uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+ int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0;
+ uint8_t buf[2];
+ uint8_t buf512[2];
+
+ test[XBZRLE_PAGE_SIZE - 1] = 1;
+ test512[XBZRLE_PAGE_SIZE - 1] = 1;
+
+ time_t t_start, t_end, t_start512, t_end512;
+ t_start = clock();
+ dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+ XBZRLE_PAGE_SIZE);
+ t_end = clock();
+ float time_val = difftime(t_end, t_start);
+ g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
+
+ rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
+ g_assert(rc == XBZRLE_PAGE_SIZE);
+ g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
+
+ t_start512 = clock();
+ dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
+ compressed512, XBZRLE_PAGE_SIZE);
+ t_end512 = clock();
+ float time_val512 = difftime(t_end512, t_start512);
+ g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2));
+
+ rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512,
+ XBZRLE_PAGE_SIZE);
+ g_assert(rc512 == XBZRLE_PAGE_SIZE);
+ g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
+
+ res->t_raw = time_val;
+ res->t_512 = time_val512;
+
+ g_free(buffer);
+ g_free(compressed);
+ g_free(test);
+ g_free(buffer512);
+ g_free(compressed512);
+ g_free(test512);
+
+}
+
+static void test_encode_decode_1_byte_avx512(void)
+{
+ int i;
+ float time_raw = 0.0, time_512 = 0.0;
+ struct ResTime res;
+ for (i = 0; i < 10000; i++) {
+ encode_decode_1_byte(&res);
+ time_raw += res.t_raw;
+ time_512 += res.t_512;
+ }
+ printf("1 byte test:\n");
+ printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+ printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_overflow(struct ResTime *res)
+{
+ uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ int i = 0, rc = 0, rc512 = 0;
+
+ for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) {
+ test[i * 2] = 1;
+ test512[i * 2] = 1;
+ }
+
+ /* encode overflow */
+ time_t t_start, t_end, t_start512, t_end512;
+ t_start = clock();
+ rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+ XBZRLE_PAGE_SIZE);
+ t_end = clock();
+ float time_val = difftime(t_end, t_start);
+ g_assert(rc == -1);
+
+ t_start512 = clock();
+ rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
+ compressed512, XBZRLE_PAGE_SIZE);
+ t_end512 = clock();
+ float time_val512 = difftime(t_end512, t_start512);
+ g_assert(rc512 == -1);
+
+ res->t_raw = time_val;
+ res->t_512 = time_val512;
+
+ g_free(buffer);
+ g_free(compressed);
+ g_free(test);
+ g_free(buffer512);
+ g_free(compressed512);
+ g_free(test512);
+
+}
+
+static void test_encode_decode_overflow_avx512(void)
+{
+ int i;
+ float time_raw = 0.0, time_512 = 0.0;
+ struct ResTime res;
+ for (i = 0; i < 10000; i++) {
+ encode_decode_overflow(&res);
+ time_raw += res.t_raw;
+ time_512 += res.t_512;
+ }
+ printf("Overflow test:\n");
+ printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+ printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_range_avx512(struct ResTime *res)
+{
+ uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+ uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+ uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ int i = 0, rc = 0, rc512 = 0;
+ int dlen = 0, dlen512 = 0;
+
+ int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+ for (i = diff_len; i > 0; i--) {
+ buffer[1000 + i] = i;
+ test[1000 + i] = i + 4;
+ buffer512[1000 + i] = i;
+ test512[1000 + i] = i + 4;
+ }
+
+ buffer[1000 + diff_len + 3] = 103;
+ test[1000 + diff_len + 3] = 107;
+
+ buffer[1000 + diff_len + 5] = 105;
+ test[1000 + diff_len + 5] = 109;
+
+ buffer512[1000 + diff_len + 3] = 103;
+ test512[1000 + diff_len + 3] = 107;
+
+ buffer512[1000 + diff_len + 5] = 105;
+ test512[1000 + diff_len + 5] = 109;
+
+ /* test encode/decode */
+ time_t t_start, t_end, t_start512, t_end512;
+ t_start = clock();
+ dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+ XBZRLE_PAGE_SIZE);
+ t_end = clock();
+ float time_val = difftime(t_end, t_start);
+ rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
+ g_assert(rc < XBZRLE_PAGE_SIZE);
+ g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
+
+ t_start512 = clock();
+ dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
+ compressed512, XBZRLE_PAGE_SIZE);
+ t_end512 = clock();
+ float time_val512 = difftime(t_end512, t_start512);
+ rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
+ g_assert(rc512 < XBZRLE_PAGE_SIZE);
+ g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
+
+ res->t_raw = time_val;
+ res->t_512 = time_val512;
+
+ g_free(buffer);
+ g_free(compressed);
+ g_free(test);
+ g_free(buffer512);
+ g_free(compressed512);
+ g_free(test512);
+
+}
+
+static void test_encode_decode_avx512(void)
+{
+ int i;
+ float time_raw = 0.0, time_512 = 0.0;
+ struct ResTime res;
+ for (i = 0; i < 10000; i++) {
+ encode_decode_range_avx512(&res);
+ time_raw += res.t_raw;
+ time_512 += res.t_512;
+ }
+ printf("Encode decode test:\n");
+ printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+ printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_random(struct ResTime *res)
+{
+ uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+ uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+ uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+ int i = 0, rc = 0, rc512 = 0;
+ int dlen = 0, dlen512 = 0;
+
+ int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
+ /* store the index of diff */
+ int dirty_index[diff_len];
+ for (int j = 0; j < diff_len; j++) {
+ dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
+ }
+ for (i = diff_len - 1; i >= 0; i--) {
+ buffer[dirty_index[i]] = i;
+ test[dirty_index[i]] = i + 4;
+ buffer512[dirty_index[i]] = i;
+ test512[dirty_index[i]] = i + 4;
+ }
+
+ time_t t_start, t_end, t_start512, t_end512;
+ t_start = clock();
+ dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+ XBZRLE_PAGE_SIZE);
+ t_end = clock();
+ float time_val = difftime(t_end, t_start);
+ rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
+ g_assert(rc < XBZRLE_PAGE_SIZE);
+
+ t_start512 = clock();
+ dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
+ compressed512, XBZRLE_PAGE_SIZE);
+ t_end512 = clock();
+ float time_val512 = difftime(t_end512, t_start512);
+ rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
+ g_assert(rc512 < XBZRLE_PAGE_SIZE);
+
+ res->t_raw = time_val;
+ res->t_512 = time_val512;
+
+ g_free(buffer);
+ g_free(compressed);
+ g_free(test);
+ g_free(buffer512);
+ g_free(compressed512);
+ g_free(test512);
+
+}
+
+static void test_encode_decode_random_avx512(void)
+{
+ int i;
+ float time_raw = 0.0, time_512 = 0.0;
+ struct ResTime res;
+ for (i = 0; i < 10000; i++) {
+ encode_decode_random(&res);
+ time_raw += res.t_raw;
+ time_512 += res.t_512;
+ }
+ printf("Random test:\n");
+ printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+ printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ g_test_rand_int();
+ #if defined(CONFIG_AVX512BW_OPT)
+ if (likely(is_cpu_support_avx512bw)) {
+ g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512);
+ g_test_add_func("/xbzrle/encode_decode_unchanged",
+ test_encode_decode_unchanged_avx512);
+ g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512);
+ g_test_add_func("/xbzrle/encode_decode_overflow",
+ test_encode_decode_overflow_avx512);
+ g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512);
+ g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512);
+ }
+ #endif
+ return g_test_run();
+}
diff --git a/tests/unit/test-xbzrle.c b/tests/unit/test-xbzrle.c
index 795d6f1cba..baa364b443 100644
--- a/tests/unit/test-xbzrle.c
+++ b/tests/unit/test-xbzrle.c
@@ -17,6 +17,35 @@
#define XBZRLE_PAGE_SIZE 4096
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+ uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+ unsigned max = __get_cpuid_max(0, NULL);
+ int a, b, c, d;
+ if (max >= 1) {
+ __cpuid(1, a, b, c, d);
+ /* We must check that AVX is not just available, but usable. */
+ if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+ int bv;
+ __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+ __cpuid_count(7, 0, a, b, c, d);
+ /* 0xe6:
+ * XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+ * and ZMM16-ZMM31 state are enabled by OS)
+ * XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+ */
+ if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+ xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+ }
+ }
+ }
+ return ;
+}
+#endif
+
static void test_uleb(void)
{
uint32_t i, val;
@@ -55,7 +84,7 @@ static void test_encode_decode_zero(void)
buffer[1000 + diff_len + 5] = 105;
/* encode zero page */
- dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
+ dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
g_assert(dlen == 0);
@@ -79,7 +108,7 @@ static void test_encode_decode_unchanged(void)
test[1000 + diff_len + 5] = 109;
/* test unchanged buffer */
- dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
+ dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
g_assert(dlen == 0);
@@ -97,7 +126,7 @@ static void test_encode_decode_1_byte(void)
test[XBZRLE_PAGE_SIZE - 1] = 1;
- dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+ dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
@@ -122,7 +151,7 @@ static void test_encode_decode_overflow(void)
}
/* encode overflow */
- rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+ rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
g_assert(rc == -1);
@@ -153,7 +182,7 @@ static void encode_decode_range(void)
test[1000 + diff_len + 5] = 109;
/* test encode/decode */
- dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+ dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
--
2.27.0

View File

@ -0,0 +1,82 @@
From 4ab8e11adf5878d1f298a682b37d7de4632a3a8b Mon Sep 17 00:00:00 2001
From: wangmeiyang <wangmeiyang@xfusion.com>
Date: Fri, 28 Apr 2023 15:22:07 +0800
Subject: [PATCH] aio-posix: fix race between epoll upgrade and
aio_set_fd_handler()
If another thread calls aio_set_fd_handler() while the IOThread event
loop is upgrading from ppoll(2) to epoll(7) then we might miss new
AioHandlers. The epollfd will not monitor the new AioHandler's fd,
resulting in hangs.
Take the AioHandler list lock while upgrading to epoll. This prevents
AioHandlers from changing while epoll is being set up. If we cannot lock
because we're in a nested event loop, then don't upgrade to epoll (it
will happen next time we're not in a nested call).
The downside to taking the lock is that the aio_set_fd_handler() thread
has to wait until the epoll upgrade is finished, which involves many
epoll_ctl(2) system calls. However, this scenario is rare and I couldn't
think of another solution that is still simple.
origin commit: https://gitlab.com/qemu-project/qemu/-/commit/e62da98527fa35fe5f532cded01a33edf9fbe7b2
Signed-off-by: Meiyang Wang <wangmeiyang@xfusion.com>
Reported-by: Qing Wang <qinwang@redhat.com>
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2090998
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Fam Zheng <fam@euphon.net>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230323144859.1338495-1-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
util/fdmon-epoll.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c
index e11a8a022e..1683aa1105 100644
--- a/util/fdmon-epoll.c
+++ b/util/fdmon-epoll.c
@@ -127,6 +127,8 @@ static bool fdmon_epoll_try_enable(AioContext *ctx)
bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
{
+ bool ok;
+
if (ctx->epollfd < 0) {
return false;
}
@@ -136,14 +138,23 @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
return false;
}
- if (npfd >= EPOLL_ENABLE_THRESHOLD) {
- if (fdmon_epoll_try_enable(ctx)) {
- return true;
- } else {
- fdmon_epoll_disable(ctx);
- }
+ if (npfd < EPOLL_ENABLE_THRESHOLD) {
+ return false;
+ }
+
+ /* The list must not change while we add fds to epoll */
+ if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+ return false;
+ }
+
+ ok = fdmon_epoll_try_enable(ctx);
+
+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+
+ if (!ok) {
+ fdmon_epoll_disable(ctx);
}
- return false;
+ return ok;
}
void fdmon_epoll_setup(AioContext *ctx)
--
2.27.0

View File

@ -0,0 +1,290 @@
From 54625fb7d039ef746f88ad0bf78515e96af7305d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 8 Nov 2021 13:38:58 +0100
Subject: [PATCH] configure, meson: move AVX tests to meson
mainline inclusion
from mainline-v7.0.0-rc0
commit 622753d2fb501509ab03c241d476815f378d4ba5
category: feature
feature: AVX512 support for xbzrle_encode_buffer
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6Z50P
Intel-SIG: commit 622753d2fb50 ("configure, meson: move AVX tests to meson")
-------------------------------------
configure, meson: move AVX tests to meson
For consistency with other tests, --enable-avx2 and --enable-avx512f
fail to compile on x86 systems if cpuid.h is not available.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Aichun Shi <aichun.shi@intel.com>
---
configure | 103 ----------------------------------
meson.build | 50 ++++++++++++++++-
meson_options.txt | 4 ++
scripts/meson-buildoptions.sh | 6 ++
4 files changed, 58 insertions(+), 105 deletions(-)
diff --git a/configure b/configure
index a84dc891cc..d7a4502a8b 100755
--- a/configure
+++ b/configure
@@ -329,8 +329,6 @@ qom_cast_debug="yes"
trace_backends="log"
trace_file="trace"
opengl="$default_feature"
-cpuid_h="no"
-avx2_opt="$default_feature"
guest_agent="$default_feature"
guest_agent_with_vss="no"
guest_agent_ntddscsi="no"
@@ -1053,14 +1051,6 @@ for opt do
;;
--disable-tools) want_tools="no"
;;
- --disable-avx2) avx2_opt="no"
- ;;
- --enable-avx2) avx2_opt="yes"
- ;;
- --disable-avx512f) avx512f_opt="no"
- ;;
- --enable-avx512f) avx512f_opt="yes"
- ;;
--disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2
;;
@@ -1456,8 +1446,6 @@ cat << EOF
tpm TPM support
libssh ssh block device support
numa libnuma support
- avx2 AVX2 optimization support
- avx512f AVX512F optimization support
replication replication support
opengl opengl support
xfsctl xfsctl support
@@ -2893,85 +2881,6 @@ else # "$safe_stack" = ""
fi
fi
-########################################
-# check if cpuid.h is usable.
-
-cat > $TMPC << EOF
-#include <cpuid.h>
-int main(void) {
- unsigned a, b, c, d;
- int max = __get_cpuid_max(0, 0);
-
- if (max >= 1) {
- __cpuid(1, a, b, c, d);
- }
-
- if (max >= 7) {
- __cpuid_count(7, 0, a, b, c, d);
- }
-
- return 0;
-}
-EOF
-if compile_prog "" "" ; then
- cpuid_h=yes
-fi
-
-##########################################
-# avx2 optimization requirement check
-#
-# There is no point enabling this if cpuid.h is not usable,
-# since we won't be able to select the new routines.
-
-if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
- cat > $TMPC << EOF
-#pragma GCC push_options
-#pragma GCC target("avx2")
-#include <cpuid.h>
-#include <immintrin.h>
-static int bar(void *a) {
- __m256i x = *(__m256i *)a;
- return _mm256_testz_si256(x, x);
-}
-int main(int argc, char *argv[]) { return bar(argv[0]); }
-EOF
- if compile_object "-Werror" ; then
- avx2_opt="yes"
- else
- avx2_opt="no"
- fi
-fi
-
-##########################################
-# avx512f optimization requirement check
-#
-# There is no point enabling this if cpuid.h is not usable,
-# since we won't be able to select the new routines.
-# by default, it is turned off.
-# if user explicitly want to enable it, check environment
-
-if test "$cpuid_h" = "yes" && test "$avx512f_opt" = "yes"; then
- cat > $TMPC << EOF
-#pragma GCC push_options
-#pragma GCC target("avx512f")
-#include <cpuid.h>
-#include <immintrin.h>
-static int bar(void *a) {
- __m512i x = *(__m512i *)a;
- return _mm512_test_epi64_mask(x, x);
-}
-int main(int argc, char *argv[])
-{
- return bar(argv[0]);
-}
-EOF
- if ! compile_object "-Werror" ; then
- avx512f_opt="no"
- fi
-else
- avx512f_opt="no"
-fi
-
########################################
# check if __[u]int128_t is usable.
@@ -3587,14 +3496,6 @@ if test "$opengl" = "yes" ; then
echo "OPENGL_LIBS=$opengl_libs" >> $config_host_mak
fi
-if test "$avx2_opt" = "yes" ; then
- echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
-fi
-
-if test "$avx512f_opt" = "yes" ; then
- echo "CONFIG_AVX512F_OPT=y" >> $config_host_mak
-fi
-
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
echo "CONFIG_BSD=y" >> $config_host_mak
@@ -3627,10 +3528,6 @@ if test "$have_tsan" = "yes" && test "$have_tsan_iface_fiber" = "yes" ; then
echo "CONFIG_TSAN=y" >> $config_host_mak
fi
-if test "$cpuid_h" = "yes" ; then
- echo "CONFIG_CPUID_H=y" >> $config_host_mak
-fi
-
if test "$int128" = "yes" ; then
echo "CONFIG_INT128=y" >> $config_host_mak
fi
diff --git a/meson.build b/meson.build
index d80426b3e8..9f77254861 100644
--- a/meson.build
+++ b/meson.build
@@ -1750,6 +1750,52 @@ config_host_data.set('CONFIG_GETAUXVAL', cc.links(gnu_source_prefix + '''
return getauxval(AT_HWCAP) == 0;
}'''))
+have_cpuid_h = cc.links('''
+ #include <cpuid.h>
+ int main(void) {
+ unsigned a, b, c, d;
+ unsigned max = __get_cpuid_max(0, 0);
+
+ if (max >= 1) {
+ __cpuid(1, a, b, c, d);
+ }
+
+ if (max >= 7) {
+ __cpuid_count(7, 0, a, b, c, d);
+ }
+
+ return 0;
+ }''')
+config_host_data.set('CONFIG_CPUID_H', have_cpuid_h)
+
+config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
+ .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX2') \
+ .require(cc.links('''
+ #pragma GCC push_options
+ #pragma GCC target("avx2")
+ #include <cpuid.h>
+ #include <immintrin.h>
+ static int bar(void *a) {
+ __m256i x = *(__m256i *)a;
+ return _mm256_testz_si256(x, x);
+ }
+ int main(int argc, char *argv[]) { return bar(argv[0]); }
+ '''), error_message: 'AVX2 not available').allowed())
+
+config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
+ .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512F') \
+ .require(cc.links('''
+ #pragma GCC push_options
+ #pragma GCC target("avx512f")
+ #include <cpuid.h>
+ #include <immintrin.h>
+ static int bar(void *a) {
+ __m512i x = *(__m512i *)a;
+ return _mm512_test_epi64_mask(x, x);
+ }
+ int main(int argc, char *argv[]) { return bar(argv[0]); }
+ '''), error_message: 'AVX512F not available').allowed())
+
config_host_data.set('CONFIG_AF_VSOCK', cc.compiles(gnu_source_prefix + '''
#include <errno.h>
#include <sys/types.h>
@@ -3271,8 +3317,8 @@ summary_info += {'membarrier': config_host.has_key('CONFIG_MEMBARRIER')}
summary_info += {'debug stack usage': config_host.has_key('CONFIG_DEBUG_STACK_USAGE')}
summary_info += {'mutex debugging': config_host.has_key('CONFIG_DEBUG_MUTEX')}
summary_info += {'memory allocator': get_option('malloc')}
-summary_info += {'avx2 optimization': config_host.has_key('CONFIG_AVX2_OPT')}
-summary_info += {'avx512f optimization': config_host.has_key('CONFIG_AVX512F_OPT')}
+summary_info += {'avx2 optimization': config_host_data.get('CONFIG_AVX2_OPT')}
+summary_info += {'avx512f optimization': config_host_data.get('CONFIG_AVX512F_OPT')}
summary_info += {'gprof enabled': config_host.has_key('CONFIG_GPROF')}
summary_info += {'gcov': get_option('b_coverage')}
summary_info += {'thread sanitizer': config_host.has_key('CONFIG_TSAN')}
diff --git a/meson_options.txt b/meson_options.txt
index e392323732..e9cbe48cb9 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -66,6 +66,10 @@ option('cfi_debug', type: 'boolean', value: 'false',
description: 'Verbose errors in case of CFI violation')
option('multiprocess', type: 'feature', value: 'auto',
description: 'Out of process device emulation support')
+option('avx2', type: 'feature', value: 'auto',
+ description: 'AVX2 optimizations')
+option('avx512f', type: 'feature', value: 'disabled',
+ description: 'AVX512F optimizations')
option('attr', type : 'feature', value : 'auto',
description: 'attr/xattr support')
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 7a17ff4218..b994bf16f0 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -25,6 +25,8 @@ meson_options_help() {
printf "%s\n" ' alsa ALSA sound support'
printf "%s\n" ' attr attr/xattr support'
printf "%s\n" ' auth-pam PAM access control'
+ printf "%s\n" ' avx2 AVX2 optimizations'
+ printf "%s\n" ' avx512f AVX512F optimizations'
printf "%s\n" ' bpf eBPF support'
printf "%s\n" ' brlapi brlapi character device driver'
printf "%s\n" ' bzip2 bzip2 support for DMG images'
@@ -107,6 +109,10 @@ _meson_option_parse() {
--disable-attr) printf "%s" -Dattr=disabled ;;
--enable-auth-pam) printf "%s" -Dauth_pam=enabled ;;
--disable-auth-pam) printf "%s" -Dauth_pam=disabled ;;
+ --enable-avx2) printf "%s" -Davx2=enabled ;;
+ --disable-avx2) printf "%s" -Davx2=disabled ;;
+ --enable-avx512f) printf "%s" -Davx512f=enabled ;;
+ --disable-avx512f) printf "%s" -Davx512f=disabled ;;
--enable-bpf) printf "%s" -Dbpf=enabled ;;
--disable-bpf) printf "%s" -Dbpf=disabled ;;
--enable-brlapi) printf "%s" -Dbrlapi=enabled ;;
--
2.27.0

View File

@ -0,0 +1,40 @@
From 2a3757a66aad487b64afb8935015c408fd9fdcbb Mon Sep 17 00:00:00 2001
From: wangmeiyang <wangmeiyang@xfusion.com>
Date: Fri, 28 Apr 2023 12:01:45 +0800
Subject: [PATCH] hw/nvme: fix memory leak in nvme_dsm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The iocb (and the allocated memory to hold LBA ranges) leaks if reading
the LBA ranges fails.
Fix this by adding a free and an unref of the iocb.
origin commit: https://gitlab.com/qemu-project/qemu/-/commit/4b32319cdacd99be983e1a74128289ef52c5964e
Signed-off-by: Meiyang Wang <wangmeiyang@xfusion.com>
Reported-by: Coverity (CID 1508281)
Fixes: d7d1474fd85d ("hw/nvme: reimplement dsm to allow cancellation")
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/nvme/ctrl.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 40fbda3b03..5f1515828b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -2381,6 +2381,9 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
status = nvme_h2c(n, (uint8_t *)iocb->range, sizeof(NvmeDsmRange) * nr,
req);
if (status) {
+ g_free(iocb->range);
+ qemu_aio_unref(iocb);
+
return status;
}
--
2.27.0

View File

@ -0,0 +1,72 @@
From b8822efafc2012de3e92700afc7524df027c914b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 16 Mar 2023 16:49:26 -0700
Subject: [PATCH] hw/usb/imx: Fix out of bounds access in imx_usbphy_read()
The i.MX USB Phy driver does not check register ranges, resulting in out of
bounds accesses if an attempt is made to access non-existing PHY registers.
Add range check and conditionally report bad accesses to fix the problem.
While at it, also conditionally log attempted writes to non-existing or
read-only registers.
Reported-by: Qiang Liu <cyruscyliu@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Qiang Liu <cyruscyliu@gmail.com>
Message-id: 20230316234926.208874-1-linux@roeck-us.net
Link: https://gitlab.com/qemu-project/qemu/-/issues/1408
Fixes: 0701a5efa015 ("hw/usb: Add basic i.MX USB Phy support")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
hw/usb/imx-usb-phy.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/hw/usb/imx-usb-phy.c b/hw/usb/imx-usb-phy.c
index 5d7a549e34..1a97b36a11 100644
--- a/hw/usb/imx-usb-phy.c
+++ b/hw/usb/imx-usb-phy.c
@@ -13,6 +13,7 @@
#include "qemu/osdep.h"
#include "hw/usb/imx-usb-phy.h"
#include "migration/vmstate.h"
+#include "qemu/log.h"
#include "qemu/module.h"
static const VMStateDescription vmstate_imx_usbphy = {
@@ -90,7 +91,15 @@ static uint64_t imx_usbphy_read(void *opaque, hwaddr offset, unsigned size)
value = s->usbphy[index - 3];
break;
default:
- value = s->usbphy[index];
+ if (index < USBPHY_MAX) {
+ value = s->usbphy[index];
+ } else {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Read from non-existing USB PHY register 0x%"
+ HWADDR_PRIx "\n",
+ __func__, offset);
+ value = 0;
+ }
break;
}
return (uint64_t)value;
@@ -168,7 +177,13 @@ static void imx_usbphy_write(void *opaque, hwaddr offset, uint64_t value,
s->usbphy[index - 3] ^= value;
break;
default:
- /* Other registers are read-only */
+ /* Other registers are read-only or do not exist */
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Write to %s USB PHY register 0x%"
+ HWADDR_PRIx "\n",
+ __func__,
+ index >= USBPHY_MAX ? "non-existing" : "read-only",
+ offset);
break;
}
}
--
2.27.0

View File

@ -0,0 +1,81 @@
From 553baa5eac50560c14ed216744062f542df17011 Mon Sep 17 00:00:00 2001
From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Date: Mon, 13 Mar 2023 15:58:20 -0300
Subject: [PATCH] migration/xbzrle: fix out-of-bounds write with axv512
mainline inclusion
from mainline-v8.0.0-rc1
commit 1776b70f55c75541e9cab3423650a59b085162a9
category: feature
feature: AVX512 support for xbzrle_encode_buffer
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6Z50P
Intel-SIG: commit 1776b70f55c7 ("migration/xbzrle: fix out-of-bounds write with axv512")
-------------------------------------
migration/xbzrle: fix out-of-bounds write with axv512
xbzrle_encode_buffer_avx512() checks for overflows too scarcely in its
outer loop, causing out-of-bounds writes:
$ ../configure --target-list=aarch64-softmmu --enable-sanitizers --enable-avx512bw
$ make tests/unit/test-xbzrle && ./tests/unit/test-xbzrle
==5518==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x62100000b100 at pc 0x561109a7714d bp 0x7ffed712a440 sp 0x7ffed712a430
WRITE of size 1 at 0x62100000b100 thread T0
#0 0x561109a7714c in uleb128_encode_small ../util/cutils.c:831
#1 0x561109b67f6a in xbzrle_encode_buffer_avx512 ../migration/xbzrle.c:275
#2 0x5611099a7428 in test_encode_decode_overflow ../tests/unit/test-xbzrle.c:153
#3 0x7fb2fb65a58d (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x7a58d)
#4 0x7fb2fb65a333 (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x7a333)
#5 0x7fb2fb65aa79 in g_test_run_suite (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x7aa79)
#6 0x7fb2fb65aa94 in g_test_run (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x7aa94)
#7 0x5611099a3a23 in main ../tests/unit/test-xbzrle.c:218
#8 0x7fb2fa78c082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082)
#9 0x5611099a608d in _start (/qemu/build/tests/unit/test-xbzrle+0x28408d)
0x62100000b100 is located 0 bytes to the right of 4096-byte region [0x62100000a100,0x62100000b100)
allocated by thread T0 here:
#0 0x7fb2fb823a06 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:153
#1 0x7fb2fb637ef0 in g_malloc0 (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x57ef0)
Fix that by performing the overflow check in the inner loop, instead.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Aichun Shi <aichun.shi@intel.com>
---
migration/xbzrle.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
index 21b92d4eae..c6f8b20917 100644
--- a/migration/xbzrle.c
+++ b/migration/xbzrle.c
@@ -197,10 +197,6 @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
__m512i r = _mm512_set1_epi32(0);
while (count512s) {
- if (d + 2 > dlen) {
- return -1;
- }
-
int bytes_to_check = 64;
uint64_t mask = 0xffffffffffffffff;
if (count512s == 1) {
@@ -216,6 +212,9 @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
bool is_same = (comp & 0x1);
while (bytes_to_check) {
+ if (d + 2 > dlen) {
+ return -1;
+ }
if (is_same) {
if (nzrun_len) {
d += uleb128_encode_small(dst + d, nzrun_len);
--
2.27.0

View File

@ -0,0 +1,69 @@
From d4c03c1e41043f25e21889762bceb480abb56634 Mon Sep 17 00:00:00 2001
From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Date: Mon, 13 Mar 2023 15:58:19 -0300
Subject: [PATCH] migration/xbzrle: use ctz64 to avoid undefined result
mainline inclusion
from mainline-v8.0.0-rc1
commit d84a78d15d3af9ff28ceec6906a4b101bd545b55
category: feature
feature: AVX512 support for xbzrle_encode_buffer
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6Z50P
Intel-SIG: commit d84a78d15d3a ("migration/xbzrle: use ctz64 to avoid undefined result")
-------------------------------------
migration/xbzrle: use ctz64 to avoid undefined result
__builtin_ctzll() produces undefined results when the argument is 0.
This can be seen through test-xbzrle, which produces the following
warning:
../migration/xbzrle.c:265: runtime error: passing zero to ctz(), which is not a valid argument
Replace __builtin_ctzll() with our ctz64() wrapper which properly
handles 0.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Aichun Shi <aichun.shi@intel.com>
---
migration/xbzrle.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
index 05366e86c0..21b92d4eae 100644
--- a/migration/xbzrle.c
+++ b/migration/xbzrle.c
@@ -12,6 +12,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
+#include "qemu/host-utils.h"
#include "xbzrle.h"
/*
@@ -233,7 +234,7 @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
break;
}
never_same = false;
- num = __builtin_ctzll(~comp);
+ num = ctz64(~comp);
num = (num < bytes_to_check) ? num : bytes_to_check;
zrun_len += num;
bytes_to_check -= num;
@@ -262,7 +263,7 @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
nzrun_len += 64;
break;
}
- num = __builtin_ctzll(comp);
+ num = ctz64(comp);
num = (num < bytes_to_check) ? num : bytes_to_check;
nzrun_len += num;
bytes_to_check -= num;
--
2.27.0

View File

@ -3,7 +3,7 @@
Name: qemu
Version: 6.2.0
Release: 71
Release: 72
Epoch: 10
Summary: QEMU is a generic and open source machine emulator and virtualizer
License: GPLv2 and BSD and MIT and CC-BY-SA-4.0
@ -489,6 +489,19 @@ Patch0474: block-rbd-workaround-for-ceph-issue-53784.patch
Patch0475: target-i386-add-FZRM-FSRS-FSRC.patch
Patch0476: i386-Add-new-CPU-model-SapphireRapids.patch
Patch0477: core-cpu-common-Fix-the-wrong-ifdef-__aarch64__.patch
Patch0478: target-i386-Fix-sanity-check-on-max-APIC-ID-X2APIC-e.patch
Patch0479: target-i386-Set-maximum-APIC-ID-to-KVM-prior-to-vCPU.patch
Patch0480: hw-usb-imx-Fix-out-of-bounds-access-in-imx_usbphy_re.patch
Patch0481: target-i386-Add-SGX-aex-notify-and-EDECCSSA-support.patch
Patch0482: aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch
Patch0483: hw-nvme-fix-memory-leak-in-nvme_dsm.patch
Patch0484: target-i386-add-FSRM-to-TCG.patch
Patch0485: target-i386-KVM-allow-fast-string-operations-if-host.patch
Patch0486: configure-meson-move-AVX-tests-to-meson.patch
Patch0487: AVX512-support-for-xbzrle_encode_buffer.patch
Patch0488: Update-bench-code-for-addressing-CI-problem.patch
Patch0489: migration-xbzrle-use-ctz64-to-avoid-undefined-result.patch
Patch0490: migration-xbzrle-fix-out-of-bounds-write-with-axv512.patch
BuildRequires: flex
BuildRequires: gcc
@ -1063,6 +1076,21 @@ getent passwd qemu >/dev/null || \
%endif
%changelog
* Wed May 17 2023 <xufei30@huawei.com> - 10:6.2.0-72
- migration/xbzrle: fix out-of-bounds write with axv512
- migration/xbzrle: use ctz64 to avoid undefined result
- Update bench-code for addressing CI problem
- AVX512 support for xbzrle_encode_buffer
- configure, meson: move AVX tests to meson
- target/i386: KVM: allow fast string operations if host supports them
- target/i386: add FSRM to TCG
- hw/nvme: fix memory leak in nvme_dsm
- aio-posix: fix race between epoll upgrade and aio_set_fd_handler()
- target/i386: Add SGX aex-notify and EDECCSSA support
- hw/usb/imx: Fix out of bounds access in imx_usbphy_read()
- target/i386: Set maximum APIC ID to KVM prior to vCPU creation
- target/i386: Fix sanity check on max APIC ID / X2APIC enablement
* Sat Apr 22 2023 <xufei30@huawei.com> - 10:6.2.0-71
- vhost-user-blk: fix the resize crash
- plugins: make qemu_plugin_user_exit's locking order consistent with fork_start's

View File

@ -0,0 +1,64 @@
From b4657a1cf12f3a0a650498d87f4e91aae76cc840 Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Wed, 9 Nov 2022 15:48:34 +1300
Subject: [PATCH] target/i386: Add SGX aex-notify and EDECCSSA support
from mainline-v8.0.0-rc0
commit d45f24fe7525d8a8aaa4ca6d9d214dc41819caa5
category: feature
feature: SGX aex-notify and EDECCSSA support
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6Y4W4
Intel-SIG: commit d45f24fe7525 ("target/i386: Add SGX aex-notify and EDECCSSA support")
-----------------------------------------------------------
The new SGX Asynchronous Exit (AEX) notification mechanism (AEX-notify)
allows one enclave to receive a notification in the ERESUME after the
enclave exit due to an AEX. EDECCSSA is a new SGX user leaf function
(ENCLU[EDECCSSA]) to facilitate the AEX notification handling.
Whether the hardware supports to create enclave with AEX-notify support
is enumerated via CPUID.(EAX=0x12,ECX=0x1):EAX[10]. The new EDECCSSA
user leaf function is enumerated via CPUID.(EAX=0x12,ECX=0x0):EAX[11].
Add support to allow to expose the new SGX AEX-notify feature and the
new EDECCSSA user leaf function to KVM guest.
Link: https://lore.kernel.org/lkml/166760360549.4906.809756297092548496.tip-bot2@tip-bot2/
Link: https://lore.kernel.org/lkml/166760360934.4906.2427175408052308969.tip-bot2@tip-bot2/
Reviewed-by: Yang Zhong <yang.zhong@linux.intel.com>
Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <20221109024834.172705-1-kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[ jason: amend commit log ]
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
target/i386/cpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 61cd7abcaa..df475f27d3 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1205,7 +1205,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.feat_names = {
"sgx1", "sgx2", NULL, NULL,
NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "sgx-edeccssa",
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
@@ -1245,7 +1245,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.feat_names = {
NULL, "sgx-debug", "sgx-mode64", NULL,
"sgx-provisionkey", "sgx-tokenkey", NULL, "sgx-kss",
- NULL, NULL, NULL, NULL,
+ NULL, NULL, "sgx-aex-notify", NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
--
2.27.0

View File

@ -0,0 +1,109 @@
From 197ebfabf4319c3dff79f06822d98304df2a3110 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 14 Mar 2022 14:25:41 +0000
Subject: [PATCH] target/i386: Fix sanity check on max APIC ID / X2APIC
enablement
from mainline-v7.1.0-rc0
commit dc89f32d92bba795b0665f075b78d8881cf67ab3
category: feature
feature: Optimization of IPI virtualization
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6Y34T
Intel-SIG: commit dc89f32d92bb ("target/i386: Fix sanity check on max APIC ID / X2APIC enablement")
------------------------------------------------
The check on x86ms->apic_id_limit in pc_machine_done() had two problems.
Firstly, we need KVM to support the X2APIC API in order to allow IRQ
delivery to APICs >= 255. So we need to call/check kvm_enable_x2apic(),
which was done elsewhere in *some* cases but not all.
Secondly, microvm needs the same check. So move it from pc_machine_done()
to x86_cpus_init() where it will work for both.
The check in kvm_cpu_instance_init() is now redundant and can be dropped.
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Claudio Fontana <cfontana@suse.de>
Message-Id: <20220314142544.150555-1-dwmw2@infradead.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
[ jason: amend commit log ]
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
hw/i386/pc.c | 8 --------
hw/i386/x86.c | 16 ++++++++++++++++
target/i386/kvm/kvm-cpu.c | 2 +-
3 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 4870ce0f96..c5f430f83d 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -736,14 +736,6 @@ void pc_machine_done(Notifier *notifier, void *data)
/* update FW_CFG_NB_CPUS to account for -device added CPUs */
fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
}
-
-
- if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
- !kvm_irqchip_in_kernel()) {
- error_report("current -smp configuration requires kernel "
- "irqchip support.");
- exit(EXIT_FAILURE);
- }
}
void pc_guest_info_init(PCMachineState *pcms)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index b84840a1bb..f64639b873 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -39,6 +39,7 @@
#include "sysemu/replay.h"
#include "sysemu/sysemu.h"
#include "sysemu/cpu-timers.h"
+#include "sysemu/xen.h"
#include "trace.h"
#include "hw/i386/x86.h"
@@ -136,6 +137,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
*/
x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
ms->smp.max_cpus - 1) + 1;
+
+ /*
+ * Can we support APIC ID 255 or higher?
+ *
+ * Under Xen: yes.
+ * With userspace emulated lapic: no
+ * With KVM's in-kernel lapic: only if X2APIC API is enabled.
+ */
+ if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
+ (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+ error_report("current -smp configuration requires kernel "
+ "irqchip and X2APIC API support.");
+ exit(EXIT_FAILURE);
+ }
+
possible_cpus = mc->possible_cpu_arch_ids(ms);
for (i = 0; i < ms->smp.cpus; i++) {
x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index 5eb955ce9a..7237378a7d 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -171,7 +171,7 @@ static void kvm_cpu_instance_init(CPUState *cs)
/* only applies to builtin_x86_defs cpus */
if (!kvm_irqchip_in_kernel()) {
x86_cpu_change_kvm_default("x2apic", "off");
- } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
+ } else if (kvm_irqchip_is_split()) {
x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
}
--
2.27.0

View File

@ -0,0 +1,81 @@
From 52ee6f565f4b4a0ca3325e94dcb44ce68ca61eee Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 27 Feb 2023 10:41:46 +0100
Subject: [PATCH] target/i386: KVM: allow fast string operations if host
supports them
mainline inclusion
from mainline-v8.0.0-rc0
commit 3023c9b4d1092eb27a523c08d9e78cbaec67b59b
category: feature
feature: Intel fast REP string operations support
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6ZGIX
Intel-SIG: commit 3023c9b4d109 ("target/i386: KVM: allow fast string operations if host supports them")
-------------------------------------
target/i386: KVM: allow fast string operations if host supports them
These are just a flag that documents the performance characteristic of
an instruction; it needs no hypervisor support. So include them even
if KVM does not show them. In particular, FZRM/FSRS/FSRC have only
been added very recently, but they are available on Sapphire Rapids
processors.
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Aichun Shi <aichun.shi@intel.com>
---
target/i386/kvm/kvm.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index b8257e7e5f..6fa3bd9694 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -350,7 +350,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
{
struct kvm_cpuid2 *cpuid;
uint32_t ret = 0;
- uint32_t cpuid_1_edx;
+ uint32_t cpuid_1_edx, unused;
uint64_t bitmask;
cpuid = get_supported_cpuid(s);
@@ -397,10 +397,20 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
} else if (function == 6 && reg == R_EAX) {
ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */
} else if (function == 7 && index == 0 && reg == R_EBX) {
+ /* Not new instructions, just an optimization. */
+ uint32_t ebx;
+ host_cpuid(7, 0, &unused, &ebx, &unused, &unused);
+ ret |= ebx & CPUID_7_0_EBX_ERMS;
+
if (host_tsx_broken()) {
ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE);
}
} else if (function == 7 && index == 0 && reg == R_EDX) {
+ /* Not new instructions, just an optimization. */
+ uint32_t edx;
+ host_cpuid(7, 0, &unused, &unused, &unused, &edx);
+ ret |= edx & CPUID_7_0_EDX_FSRM;
+
/*
* Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts.
* We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is
@@ -409,6 +419,11 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
if (!has_msr_arch_capabs) {
ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
}
+ } else if (function == 7 && index == 1 && reg == R_EAX) {
+ /* Not new instructions, just an optimization. */
+ uint32_t eax;
+ host_cpuid(7, 1, &eax, &unused, &unused, &unused);
+ ret |= eax & (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_FSRC);
} else if (function == 0xd && index == 0 &&
(reg == R_EAX || reg == R_EDX)) {
/*
--
2.27.0

View File

@ -0,0 +1,95 @@
From db3e0a8dd430a11e8dde6aee4e1f9cca4af0e015 Mon Sep 17 00:00:00 2001
From: Zeng Guang <guang.zeng@intel.com>
Date: Thu, 25 Aug 2022 10:52:46 +0800
Subject: [PATCH] target/i386: Set maximum APIC ID to KVM prior to vCPU
creation
from mainline-v7.2.0-rc0
commit 19e2a9fb9da067acba95b3be83588bda5a3f6a99
category: feature
feature: Optimization of IPI virtualization
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6Y34T
Intel-SIG: commit 19e2a9fb9da0 ("target/i386: Set maximum APIC ID to KVM prior to vCPU creation")
------------------------------------------------
Specify maximum possible APIC ID assigned for current VM session to KVM
prior to the creation of vCPUs. By this setting, KVM can set up VM-scoped
data structure indexed by the APIC ID, e.g. Posted-Interrupt Descriptor
pointer table to support Intel IPI virtualization, with the most optimal
memory footprint.
It can be achieved by calling KVM_ENABLE_CAP for KVM_CAP_MAX_VCPU_ID
capability once KVM has enabled it. Ignoring the return error if KVM
doesn't support this capability yet.
Signed-off-by: Zeng Guang <guang.zeng@intel.com>
Acked-by: Peter Xu <peterx@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20220825025246.26618-1-guang.zeng@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[ jason: amend commit log ]
Signed-off-by: Jason Zeng <jason.zeng@intel.com>
---
hw/i386/x86.c | 4 ++++
target/i386/kvm/kvm-stub.c | 5 +++++
target/i386/kvm/kvm.c | 5 +++++
target/i386/kvm/kvm_i386.h | 2 ++
4 files changed, 16 insertions(+)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index f64639b873..a3258d78fa 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -152,6 +152,10 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
exit(EXIT_FAILURE);
}
+ if (kvm_enabled()) {
+ kvm_set_max_apic_id(x86ms->apic_id_limit);
+ }
+
possible_cpus = mc->possible_cpu_arch_ids(ms);
for (i = 0; i < ms->smp.cpus; i++) {
x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/target/i386/kvm/kvm-stub.c b/target/i386/kvm/kvm-stub.c
index f6e7e4466e..e052f1c7b0 100644
--- a/target/i386/kvm/kvm-stub.c
+++ b/target/i386/kvm/kvm-stub.c
@@ -44,3 +44,8 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
{
abort();
}
+
+void kvm_set_max_apic_id(uint32_t max_apic_id)
+{
+ return;
+}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index b8257e7e5f..7212ed98a9 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -5270,3 +5270,8 @@ void kvm_arch_accel_class_init(ObjectClass *oc)
"Clock cycles without an event window "
"after which a notification VM exit occurs");
}
+
+void kvm_set_max_apic_id(uint32_t max_apic_id)
+{
+ kvm_vm_enable_cap(kvm_state, KVM_CAP_MAX_VCPU_ID, 0, max_apic_id);
+}
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 4124912c20..58590138e5 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -54,4 +54,6 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
bool kvm_enable_sgx_provisioning(KVMState *s);
void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
+void kvm_set_max_apic_id(uint32_t max_apic_id);
+
#endif
--
2.27.0

View File

@ -0,0 +1,44 @@
From 2a2b5f93c2ee2071eb32c65f925974d02c11808d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 27 Feb 2023 10:57:09 +0100
Subject: [PATCH] target/i386: add FSRM to TCG
mainline inclusion
from mainline-v8.0.0-rc0
commit c0728d4e3d23356691e4182eac54c67e1ca26618
category: feature
feature: Intel fast REP string operations support
bugzilla: https://gitee.com/openeuler/intel-qemu/issues/I6ZGIX
Intel-SIG: commit c0728d4e3d23 ("target/i386: add FSRM to TCG")
-------------------------------------
target/i386: add FSRM to TCG
Fast short REP MOVS can be added to TCG, since a trivial translation
of string operation is a good option for short lengths.
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Aichun Shi <aichun.shi@intel.com>
---
target/i386/cpu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 61cd7abcaa..13dcd4c720 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -660,7 +660,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | \
/* CPUID_7_0_ECX_OSPKE is dynamic */ \
CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_PKS)
-#define TCG_7_0_EDX_FEATURES 0
+#define TCG_7_0_EDX_FEATURES CPUID_7_0_EDX_FSRM
#define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \
CPUID_7_1_EAX_FSRC)
#define TCG_APM_FEATURES 0
--
2.27.0