commit 9a683b2a2978162394c3c4375622c8ee44f30ae5 Author: overweight <5324761+overweight@user.noreply.gitee.com> Date: Mon Sep 30 11:15:46 2019 -0400 Package init diff --git a/99-qemu-guest-agent.rules b/99-qemu-guest-agent.rules new file mode 100644 index 0000000..8a290ab --- /dev/null +++ b/99-qemu-guest-agent.rules @@ -0,0 +1,2 @@ +SUBSYSTEM=="virtio-ports", ATTR{name}=="org.qemu.guest_agent.0", \ + TAG+="systemd" ENV{SYSTEMD_WANTS}="qemu-guest-agent.service" diff --git a/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch b/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch new file mode 100644 index 0000000..c4c2a2c --- /dev/null +++ b/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch @@ -0,0 +1,135 @@ +From b341b4c1b514d1ca41b39bdf4d4bc4b0267350a3 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 29 Jul 2019 15:47:27 +0800 +Subject: [PATCH] ARM64: record vtimer tick when cpu is stopped + +The vtimer kick still increases even if the vcpu is stopped when VM has +save/restore or suspend/resume operation. This will cause guest watchdog +soft-lockup if the VM has lots of memory in use. + +Signed-off-by: Hao Hong +Signed-off-by: Haibin Wang +Signed-off-by: Ying Fang +--- + cpus.c | 59 ++++++++++++++++++++++++++++++++++++++++++++ + target/arm/cpu.h | 2 ++ + target/arm/machine.c | 1 + + 3 files changed, 62 insertions(+) + +diff --git a/cpus.c b/cpus.c +index e83f72b4..94c3b8ca 100644 +--- a/cpus.c ++++ b/cpus.c +@@ -1063,6 +1063,28 @@ void cpu_synchronize_all_pre_loadvm(void) + } + } + ++#ifdef __aarch64__ ++static void get_vcpu_timer_tick(CPUState *cs) ++{ ++ CPUARMState *env = &ARM_CPU(cs)->env; ++ int err; ++ struct kvm_one_reg reg; ++ uint64_t timer_tick; ++ ++ reg.id = KVM_REG_ARM_TIMER_CNT; ++ reg.addr = (uintptr_t) &timer_tick; ++ ++ err = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); ++ if (err < 0) { ++ error_report("get vcpu tick failed, ret = %d", err); ++ env->vtimer = 0; ++ return; ++ } ++ env->vtimer = timer_tick; ++ return; ++} ++#endif ++ + static int do_vm_stop(RunState state, bool send_stop) + { + int ret = 0; +@@ -1070,6 +1092,11 @@ static int do_vm_stop(RunState state, bool send_stop) + if (runstate_is_running()) { + cpu_disable_ticks(); + pause_all_vcpus(); ++#ifdef __aarch64__ ++ if (first_cpu) { ++ get_vcpu_timer_tick(first_cpu); ++ } ++#endif + runstate_set(state); + vm_state_notify(0, state); + if (send_stop) { +@@ -1909,11 +1936,43 @@ void cpu_resume(CPUState *cpu) + qemu_cpu_kick(cpu); + } + ++#ifdef __aarch64__ ++static void set_vcpu_timer_tick(CPUState *cs) ++{ ++ CPUARMState *env = &ARM_CPU(cs)->env; ++ ++ if (env->vtimer == 0) { ++ error_report("Do not set vcpu tick."); ++ return; ++ } ++ ++ int err; ++ struct kvm_one_reg reg; ++ uint64_t timer_tick = env->vtimer; ++ env->vtimer = 0; ++ ++ reg.id = KVM_REG_ARM_TIMER_CNT; ++ reg.addr = (uintptr_t) &timer_tick; ++ ++ err = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); ++ if (err < 0) { ++ error_report("Set vcpu tick failed, ret = %d", err); ++ return; ++ } ++ return; ++} ++#endif ++ + void resume_all_vcpus(void) + { + CPUState *cpu; + + qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); ++#ifdef __aarch64__ ++ if (first_cpu) { ++ set_vcpu_timer_tick(first_cpu); ++ } ++#endif + CPU_FOREACH(cpu) { + cpu_resume(cpu); + } +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index d4d28369..e107e395 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -270,6 +270,8 @@ typedef struct CPUARMState { + uint64_t elr_el[4]; /* AArch64 exception link regs */ + uint64_t sp_el[4]; /* AArch64 banked stack pointers */ + ++ uint64_t vtimer; /* Timer tick when vcpu stop */ ++ + /* System control coprocessor (cp15) */ + struct { + uint32_t c0_cpuid; +diff --git a/target/arm/machine.c b/target/arm/machine.c +index b2925496..d64a0057 100644 +--- a/target/arm/machine.c ++++ b/target/arm/machine.c +@@ -792,6 +792,7 @@ const VMStateDescription vmstate_arm_cpu = { + VMSTATE_UINT32(env.exception.syndrome, ARMCPU), + VMSTATE_UINT32(env.exception.fsr, ARMCPU), + VMSTATE_UINT64(env.exception.vaddress, ARMCPU), ++ VMSTATE_UINT64(env.vtimer, ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU), + { +-- +2.19.1 + diff --git a/Revert-Enable-build-and-install-of-our-rST-docs.patch b/Revert-Enable-build-and-install-of-our-rST-docs.patch new file mode 100644 index 0000000..e357273 --- /dev/null +++ b/Revert-Enable-build-and-install-of-our-rST-docs.patch @@ -0,0 +1,1306 @@ +From 4ed2ab47ab39de39f9753074b79303638dac020d Mon Sep 17 00:00:00 2001 +From: zhanghailiang +Date: Wed, 17 Jul 2019 09:51:03 +0800 +Subject: [PATCH] Revert "Enable build and install of our rST docs" + +--- + .gitignore | 1 - + MAINTAINERS | 6 - + Makefile | 88 +++--------- + configure | 15 +- + docs/conf.py | 216 ----------------------------- + docs/cpu-hotplug.rst | 2 +- + docs/devel/conf.py | 15 -- + docs/devel/index.rst | 22 --- + docs/devel/memory.rst | 363 ------------------------------------------------- + docs/devel/memory.txt | 351 +++++++++++++++++++++++++++++++++++++++++++++++ + docs/index.rst | 15 -- + docs/interop/conf.py | 15 -- + docs/interop/index.rst | 18 --- + 13 files changed, 374 insertions(+), 753 deletions(-) + delete mode 100644 docs/conf.py + delete mode 100644 docs/devel/conf.py + delete mode 100644 docs/devel/index.rst + delete mode 100644 docs/devel/memory.rst + create mode 100644 docs/devel/memory.txt + delete mode 100644 docs/index.rst + delete mode 100644 docs/interop/conf.py + delete mode 100644 docs/interop/index.rst + +diff --git a/.gitignore b/.gitignore +index 8f78221..7385d5c 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -1,4 +1,3 @@ +-/.doctrees + /config-devices.* + /config-all-devices.* + /config-all-disas.* +diff --git a/MAINTAINERS b/MAINTAINERS +index 56139ac..a162586 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2586,9 +2586,3 @@ GIT submodules + M: Daniel P. Berrange + S: Odd Fixes + F: scripts/git-submodule.sh +- +-Sphinx documentation configuration and build machinery +-M: Peter Maydell +-S: Maintained +-F: docs/conf.py +-F: docs/*/conf.py +diff --git a/Makefile b/Makefile +index 04a0d45..d1e465f 100644 +--- a/Makefile ++++ b/Makefile +@@ -87,20 +87,6 @@ endif + + include $(SRC_PATH)/rules.mak + +-# Create QEMU_PKGVERSION and FULL_VERSION strings +-# If PKGVERSION is set, use that; otherwise get version and -dirty status from git +-QEMU_PKGVERSION := $(if $(PKGVERSION),$(PKGVERSION),$(shell \ +- cd $(SRC_PATH); \ +- if test -e .git; then \ +- git describe --match 'v*' 2>/dev/null | tr -d '\n'; \ +- if ! git diff-index --quiet HEAD &>/dev/null; then \ +- echo "-dirty"; \ +- fi; \ +- fi)) +- +-# Either "version (pkgversion)", or just "version" if pkgversion not set +-FULL_VERSION := $(if $(QEMU_PKGVERSION),$(VERSION) ($(QEMU_PKGVERSION)),$(VERSION)) +- + GENERATED_FILES = qemu-version.h config-host.h qemu-options.def + + GENERATED_QAPI_FILES = qapi/qapi-builtin-types.h qapi/qapi-builtin-types.c +@@ -419,12 +405,27 @@ dummy := $(call unnest-vars,, \ + + include $(SRC_PATH)/tests/Makefile.include + +-all: $(DOCS) $(if $(BUILD_DOCS),sphinxdocs) $(TOOLS) $(HELPERS-y) recurse-all modules ++all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules + + qemu-version.h: FORCE + $(call quiet-command, \ +- (printf '#define QEMU_PKGVERSION "$(QEMU_PKGVERSION)"\n'; \ +- printf '#define QEMU_FULL_VERSION "$(FULL_VERSION)"\n'; \ ++ (cd $(SRC_PATH); \ ++ if test -n "$(PKGVERSION)"; then \ ++ pkgvers="$(PKGVERSION)"; \ ++ else \ ++ if test -d .git; then \ ++ pkgvers=$$(git describe --match 'v*' 2>/dev/null | tr -d '\n');\ ++ if ! git diff-index --quiet HEAD &>/dev/null; then \ ++ pkgvers="$${pkgvers}-dirty"; \ ++ fi; \ ++ fi; \ ++ fi; \ ++ printf "#define QEMU_PKGVERSION \"$${pkgvers}\"\n"; \ ++ if test -n "$${pkgvers}"; then \ ++ printf '#define QEMU_FULL_VERSION QEMU_VERSION " (" QEMU_PKGVERSION ")"\n'; \ ++ else \ ++ printf '#define QEMU_FULL_VERSION QEMU_VERSION\n'; \ ++ fi; \ + ) > $@.tmp) + $(call quiet-command, if ! cmp -s $@ $@.tmp; then \ + mv $@.tmp $@; \ +@@ -656,22 +657,6 @@ dist: qemu-$(VERSION).tar.bz2 + qemu-%.tar.bz2: + $(SRC_PATH)/scripts/make-release "$(SRC_PATH)" "$(patsubst qemu-%.tar.bz2,%,$@)" + +-# Sphinx does not allow building manuals into the same directory as +-# the source files, so if we're doing an in-tree QEMU build we must +-# build the manuals into a subdirectory (and then install them from +-# there for 'make install'). For an out-of-tree build we can just +-# use the docs/ subdirectory in the build tree as normal. +-ifeq ($(realpath $(SRC_PATH)),$(realpath .)) +-MANUAL_BUILDDIR := docs/built +-else +-MANUAL_BUILDDIR := docs +-endif +- +-define clean-manual = +-rm -rf $(MANUAL_BUILDDIR)/$1/_static +-rm -f $(MANUAL_BUILDDIR)/$1/objects.inv $(MANUAL_BUILDDIR)/$1/searchindex.js $(MANUAL_BUILDDIR)/$1/*.html +-endef +- + distclean: clean + rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi qemu-monitor-info.texi + rm -f config-all-devices.mak config-all-disas.mak config.status +@@ -692,9 +677,6 @@ distclean: clean + rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html + rm -f docs/qemu-block-drivers.7 + rm -f docs/qemu-cpu-models.7 +- rm -rf .doctrees +- $(call clean-manual,devel) +- $(call clean-manual,interop) + for d in $(TARGET_DIRS); do \ + rm -rf $$d || exit 1 ; \ + done +@@ -728,20 +710,7 @@ else + BLOBS= + endif + +-# Note that we manually filter-out the non-Sphinx documentation which +-# is currently built into the docs/interop directory in the build tree. +-define install-manual = +-for d in $$(cd $(MANUAL_BUILDDIR) && find $1 -type d); do $(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)/$$d"; done +-for f in $$(cd $(MANUAL_BUILDDIR) && find $1 -type f -a '!' '(' -name 'qemu-*-qapi.*' -o -name 'qemu-*-ref.*' ')' ); do $(INSTALL_DATA) "$(MANUAL_BUILDDIR)/$$f" "$(DESTDIR)$(qemu_docdir)/$$f"; done +-endef +- +-# Note that we deliberately do not install the "devel" manual: it is +-# for QEMU developers, and not interesting to our users. +-.PHONY: install-sphinxdocs +-install-sphinxdocs: sphinxdocs +- $(call install-manual,interop) +- +-install-doc: $(DOCS) install-sphinxdocs ++install-doc: $(DOCS) + $(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)" +@@ -892,23 +861,6 @@ docs/version.texi: $(SRC_PATH)/VERSION + %.pdf: %.texi docs/version.texi + $(call quiet-command,texi2pdf $(TEXI2PDFFLAGS) $< -o $@,"GEN","$@") + +-# Sphinx builds all its documentation at once in one invocation +-# and handles "don't rebuild things unless necessary" itself. +-# The '.doctrees' files are cached information to speed this up. +-.PHONY: sphinxdocs +-sphinxdocs: $(MANUAL_BUILDDIR)/devel/index.html $(MANUAL_BUILDDIR)/interop/index.html +- +-# Canned command to build a single manual +-build-manual = $(call quiet-command,sphinx-build $(if $(V),,-q) -b html -D version=$(VERSION) -D release="$(FULL_VERSION)" -d .doctrees/$1 $(SRC_PATH)/docs/$1 $(MANUAL_BUILDDIR)/$1 ,"SPHINX","$(MANUAL_BUILDDIR)/$1") +-# We assume all RST files in the manual's directory are used in it +-manual-deps = $(wildcard $(SRC_PATH)/docs/$1/*.rst) $(SRC_PATH)/docs/$1/conf.py $(SRC_PATH)/docs/conf.py +- +-$(MANUAL_BUILDDIR)/devel/index.html: $(call manual-deps,devel) +- $(call build-manual,devel) +- +-$(MANUAL_BUILDDIR)/interop/index.html: $(call manual-deps,interop) +- $(call build-manual,interop) +- + qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool + $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@") + +@@ -937,7 +889,7 @@ docs/qemu-block-drivers.7: docs/qemu-block-drivers.texi + docs/qemu-cpu-models.7: docs/qemu-cpu-models.texi + scripts/qemu-trace-stap.1: scripts/qemu-trace-stap.texi + +-html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html sphinxdocs ++html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html + info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info + pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf + txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt +diff --git a/configure b/configure +index 1c563a7..384a241 100755 +--- a/configure ++++ b/configure +@@ -4708,24 +4708,13 @@ if compile_prog "" "" ; then + syncfs=yes + fi + +-# Check we have a new enough version of sphinx-build +-has_sphinx_build() { +- # This is a bit awkward but works: create a trivial document and +- # try to run it with our configuration file (which enforces a +- # version requirement). This will fail if either +- # sphinx-build doesn't exist at all or if it is too old. +- mkdir -p "$TMPDIR1/sphinx" +- touch "$TMPDIR1/sphinx/index.rst" +- sphinx-build -c "$source_path/docs" -b html "$TMPDIR1/sphinx" "$TMPDIR1/sphinx/out" >/dev/null 2>&1 +-} +- + # Check if tools are available to build documentation. + if test "$docs" != "no" ; then +- if has makeinfo && has pod2man && has_sphinx_build; then ++ if has makeinfo && has pod2man; then + docs=yes + else + if test "$docs" = "yes" ; then +- feature_not_found "docs" "Install texinfo, Perl/perl-podlators and python-sphinx" ++ feature_not_found "docs" "Install texinfo and Perl/perl-podlators" + fi + docs=no + fi +diff --git a/docs/conf.py b/docs/conf.py +deleted file mode 100644 +index befbcc6..0000000 +--- a/docs/conf.py ++++ /dev/null +@@ -1,216 +0,0 @@ +-# -*- coding: utf-8 -*- +-# +-# QEMU documentation build configuration file, created by +-# sphinx-quickstart on Thu Jan 31 16:40:14 2019. +-# +-# This config file can be used in one of two ways: +-# (1) as a common config file which is included by the conf.py +-# for each of QEMU's manuals: in this case sphinx-build is run multiple +-# times, once per subdirectory. +-# (2) as a top level conf file which will result in building all +-# the manuals into a single document: in this case sphinx-build is +-# run once, on the top-level docs directory. +-# +-# QEMU's makefiles take option (1), which allows us to install +-# only the ones the user cares about (in particular we don't want +-# to ship the 'devel' manual to end-users). +-# Third-party sites such as readthedocs.org will take option (2). +-# +-# +-# This file is execfile()d with the current directory set to its +-# containing dir. +-# +-# Note that not all possible configuration values are present in this +-# autogenerated file. +-# +-# All configuration values have a default; values that are commented out +-# serve to show the default. +- +-import os +-import sys +- +-# The per-manual conf.py will set qemu_docdir for a single-manual build; +-# otherwise set it here if this is an entire-manual-set build. +-# This is always the absolute path of the docs/ directory in the source tree. +-try: +- qemu_docdir +-except NameError: +- qemu_docdir = os.path.abspath(".") +- +-# If extensions (or modules to document with autodoc) are in another directory, +-# add these directories to sys.path here. If the directory is relative to the +-# documentation root, use an absolute path starting from qemu_docdir. +-# +-# sys.path.insert(0, os.path.join(qemu_docdir, "my_subdir")) +- +- +-# -- General configuration ------------------------------------------------ +- +-# If your documentation needs a minimal Sphinx version, state it here. +-# +-# 1.3 is where the 'alabaster' theme was shipped with Sphinx. +-needs_sphinx = '1.3' +- +-# Add any Sphinx extension module names here, as strings. They can be +-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +-# ones. +-extensions = [] +- +-# Add any paths that contain templates here, relative to this directory. +-templates_path = ['_templates'] +- +-# The suffix(es) of source filenames. +-# You can specify multiple suffix as a list of string: +-# +-# source_suffix = ['.rst', '.md'] +-source_suffix = '.rst' +- +-# The master toctree document. +-master_doc = 'index' +- +-# General information about the project. +-project = u'QEMU' +-copyright = u'2019, The QEMU Project Developers' +-author = u'The QEMU Project Developers' +- +-# The version info for the project you're documenting, acts as replacement for +-# |version| and |release|, also used in various other places throughout the +-# built documents. +- +-# Extract this information from the VERSION file, for the benefit of +-# standalone Sphinx runs as used by readthedocs.org. Builds run from +-# the Makefile will pass version and release on the sphinx-build +-# command line, which override this. +-try: +- extracted_version = None +- with open(os.path.join(qemu_docdir, '../VERSION')) as f: +- extracted_version = f.readline().strip() +-except: +- pass +-finally: +- if extracted_version: +- version = release = extracted_version +- else: +- version = release = "unknown version" +- +-# The language for content autogenerated by Sphinx. Refer to documentation +-# for a list of supported languages. +-# +-# This is also used if you do content translation via gettext catalogs. +-# Usually you set "language" from the command line for these cases. +-language = None +- +-# List of patterns, relative to source directory, that match files and +-# directories to ignore when looking for source files. +-# This patterns also effect to html_static_path and html_extra_path +-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +- +-# The name of the Pygments (syntax highlighting) style to use. +-pygments_style = 'sphinx' +- +-# If true, `todo` and `todoList` produce output, else they produce nothing. +-todo_include_todos = False +- +-# Sphinx defaults to warning about use of :option: for options not defined +-# with "option::" in the document being processed. Turn that off. +-suppress_warnings = ["ref.option"] +- +-# -- Options for HTML output ---------------------------------------------- +- +-# The theme to use for HTML and HTML Help pages. See the documentation for +-# a list of builtin themes. +-# +-html_theme = 'alabaster' +- +-# Theme options are theme-specific and customize the look and feel of a theme +-# further. For a list of options available for each theme, see the +-# documentation. +-# We initialize this to empty here, so the per-manual conf.py can just +-# add individual key/value entries. +-html_theme_options = { +-} +- +-# Add any paths that contain custom static files (such as style sheets) here, +-# relative to this directory. They are copied after the builtin static files, +-# so a file named "default.css" will overwrite the builtin "default.css". +-# QEMU doesn't yet have any static files, so comment this out so we don't +-# get a warning about a missing directory. +-# If we do ever add this then it would probably be better to call the +-# subdirectory sphinx_static, as the Linux kernel does. +-# html_static_path = ['_static'] +- +-# Custom sidebar templates, must be a dictionary that maps document names +-# to template names. +-# +-# This is required for the alabaster theme +-# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars +-html_sidebars = { +- '**': [ +- 'about.html', +- 'navigation.html', +- 'searchbox.html', +- ] +-} +- +-# Don't copy the rST source files to the HTML output directory, +-# and don't put links to the sources into the output HTML. +-html_copy_source = False +- +-# -- Options for HTMLHelp output ------------------------------------------ +- +-# Output file base name for HTML help builder. +-htmlhelp_basename = 'QEMUdoc' +- +- +-# -- Options for LaTeX output --------------------------------------------- +- +-latex_elements = { +- # The paper size ('letterpaper' or 'a4paper'). +- # +- # 'papersize': 'letterpaper', +- +- # The font size ('10pt', '11pt' or '12pt'). +- # +- # 'pointsize': '10pt', +- +- # Additional stuff for the LaTeX preamble. +- # +- # 'preamble': '', +- +- # Latex figure (float) alignment +- # +- # 'figure_align': 'htbp', +-} +- +-# Grouping the document tree into LaTeX files. List of tuples +-# (source start file, target name, title, +-# author, documentclass [howto, manual, or own class]). +-latex_documents = [ +- (master_doc, 'QEMU.tex', u'QEMU Documentation', +- u'The QEMU Project Developers', 'manual'), +-] +- +- +-# -- Options for manual page output --------------------------------------- +- +-# One entry per manual page. List of tuples +-# (source start file, name, description, authors, manual section). +-man_pages = [ +- (master_doc, 'qemu', u'QEMU Documentation', +- [author], 1) +-] +- +- +-# -- Options for Texinfo output ------------------------------------------- +- +-# Grouping the document tree into Texinfo files. List of tuples +-# (source start file, target name, title, author, +-# dir menu entry, description, category) +-texinfo_documents = [ +- (master_doc, 'QEMU', u'QEMU Documentation', +- author, 'QEMU', 'One line description of project.', +- 'Miscellaneous'), +-] +- +- +- +diff --git a/docs/cpu-hotplug.rst b/docs/cpu-hotplug.rst +index d0b0640..cfeb79f 100644 +--- a/docs/cpu-hotplug.rst ++++ b/docs/cpu-hotplug.rst +@@ -60,7 +60,7 @@ vCPU hotplug + hot-plugged (no "qom-path" member). From its output in step (3), we + can see that ``IvyBridge-IBRS-x86_64-cpu`` is present in socket 0, + while hot-plugging a CPU into socket 1 requires passing the listed +- properties to QMP ``device_add``:: ++ properties to QMP ``device_add``: + + (QEMU) device_add id=cpu-2 driver=IvyBridge-IBRS-x86_64-cpu socket-id=1 core-id=0 thread-id=0 + { +diff --git a/docs/devel/conf.py b/docs/devel/conf.py +deleted file mode 100644 +index 7441f87..0000000 +--- a/docs/devel/conf.py ++++ /dev/null +@@ -1,15 +0,0 @@ +-# -*- coding: utf-8 -*- +-# +-# QEMU documentation build configuration file for the 'devel' manual. +-# +-# This includes the top level conf file and then makes any necessary tweaks. +-import sys +-import os +- +-qemu_docdir = os.path.abspath("..") +-parent_config = os.path.join(qemu_docdir, "conf.py") +-exec(compile(open(parent_config, "rb").read(), parent_config, 'exec')) +- +-# This slightly misuses the 'description', but is the best way to get +-# the manual title to appear in the sidebar. +-html_theme_options['description'] = u'Developer''s Guide' +diff --git a/docs/devel/index.rst b/docs/devel/index.rst +deleted file mode 100644 +index ebbab63..0000000 +--- a/docs/devel/index.rst ++++ /dev/null +@@ -1,22 +0,0 @@ +-.. This is the top level page for the 'devel' manual. +- +- +-QEMU Developer's Guide +-====================== +- +-This manual documents various parts of the internals of QEMU. +-You only need to read it if you are interested in reading or +-modifying QEMU's source code. +- +-Contents: +- +-.. toctree:: +- :maxdepth: 2 +- +- kconfig +- loads-stores +- memory +- migration +- stable-process +- testing +- decodetree +diff --git a/docs/devel/memory.rst b/docs/devel/memory.rst +deleted file mode 100644 +index b6a4c37..0000000 +--- a/docs/devel/memory.rst ++++ /dev/null +@@ -1,363 +0,0 @@ +-============== +-The memory API +-============== +- +-The memory API models the memory and I/O buses and controllers of a QEMU +-machine. It attempts to allow modelling of: +- +-- ordinary RAM +-- memory-mapped I/O (MMIO) +-- memory controllers that can dynamically reroute physical memory regions +- to different destinations +- +-The memory model provides support for +- +-- tracking RAM changes by the guest +-- setting up coalesced memory for kvm +-- setting up ioeventfd regions for kvm +- +-Memory is modelled as an acyclic graph of MemoryRegion objects. Sinks +-(leaves) are RAM and MMIO regions, while other nodes represent +-buses, memory controllers, and memory regions that have been rerouted. +- +-In addition to MemoryRegion objects, the memory API provides AddressSpace +-objects for every root and possibly for intermediate MemoryRegions too. +-These represent memory as seen from the CPU or a device's viewpoint. +- +-Types of regions +----------------- +- +-There are multiple types of memory regions (all represented by a single C type +-MemoryRegion): +- +-- RAM: a RAM region is simply a range of host memory that can be made available +- to the guest. +- You typically initialize these with memory_region_init_ram(). Some special +- purposes require the variants memory_region_init_resizeable_ram(), +- memory_region_init_ram_from_file(), or memory_region_init_ram_ptr(). +- +-- MMIO: a range of guest memory that is implemented by host callbacks; +- each read or write causes a callback to be called on the host. +- You initialize these with memory_region_init_io(), passing it a +- MemoryRegionOps structure describing the callbacks. +- +-- ROM: a ROM memory region works like RAM for reads (directly accessing +- a region of host memory), and forbids writes. You initialize these with +- memory_region_init_rom(). +- +-- ROM device: a ROM device memory region works like RAM for reads +- (directly accessing a region of host memory), but like MMIO for +- writes (invoking a callback). You initialize these with +- memory_region_init_rom_device(). +- +-- IOMMU region: an IOMMU region translates addresses of accesses made to it +- and forwards them to some other target memory region. As the name suggests, +- these are only needed for modelling an IOMMU, not for simple devices. +- You initialize these with memory_region_init_iommu(). +- +-- container: a container simply includes other memory regions, each at +- a different offset. Containers are useful for grouping several regions +- into one unit. For example, a PCI BAR may be composed of a RAM region +- and an MMIO region. +- +- A container's subregions are usually non-overlapping. In some cases it is +- useful to have overlapping regions; for example a memory controller that +- can overlay a subregion of RAM with MMIO or ROM, or a PCI controller +- that does not prevent card from claiming overlapping BARs. +- +- You initialize a pure container with memory_region_init(). +- +-- alias: a subsection of another region. Aliases allow a region to be +- split apart into discontiguous regions. Examples of uses are memory banks +- used when the guest address space is smaller than the amount of RAM +- addressed, or a memory controller that splits main memory to expose a "PCI +- hole". Aliases may point to any type of region, including other aliases, +- but an alias may not point back to itself, directly or indirectly. +- You initialize these with memory_region_init_alias(). +- +-- reservation region: a reservation region is primarily for debugging. +- It claims I/O space that is not supposed to be handled by QEMU itself. +- The typical use is to track parts of the address space which will be +- handled by the host kernel when KVM is enabled. You initialize these +- by passing a NULL callback parameter to memory_region_init_io(). +- +-It is valid to add subregions to a region which is not a pure container +-(that is, to an MMIO, RAM or ROM region). This means that the region +-will act like a container, except that any addresses within the container's +-region which are not claimed by any subregion are handled by the +-container itself (ie by its MMIO callbacks or RAM backing). However +-it is generally possible to achieve the same effect with a pure container +-one of whose subregions is a low priority "background" region covering +-the whole address range; this is often clearer and is preferred. +-Subregions cannot be added to an alias region. +- +-Migration +---------- +- +-Where the memory region is backed by host memory (RAM, ROM and +-ROM device memory region types), this host memory needs to be +-copied to the destination on migration. These APIs which allocate +-the host memory for you will also register the memory so it is +-migrated: +- +-- memory_region_init_ram() +-- memory_region_init_rom() +-- memory_region_init_rom_device() +- +-For most devices and boards this is the correct thing. If you +-have a special case where you need to manage the migration of +-the backing memory yourself, you can call the functions: +- +-- memory_region_init_ram_nomigrate() +-- memory_region_init_rom_nomigrate() +-- memory_region_init_rom_device_nomigrate() +- +-which only initialize the MemoryRegion and leave handling +-migration to the caller. +- +-The functions: +- +-- memory_region_init_resizeable_ram() +-- memory_region_init_ram_from_file() +-- memory_region_init_ram_from_fd() +-- memory_region_init_ram_ptr() +-- memory_region_init_ram_device_ptr() +- +-are for special cases only, and so they do not automatically +-register the backing memory for migration; the caller must +-manage migration if necessary. +- +-Region names +------------- +- +-Regions are assigned names by the constructor. For most regions these are +-only used for debugging purposes, but RAM regions also use the name to identify +-live migration sections. This means that RAM region names need to have ABI +-stability. +- +-Region lifecycle +----------------- +- +-A region is created by one of the memory_region_init*() functions and +-attached to an object, which acts as its owner or parent. QEMU ensures +-that the owner object remains alive as long as the region is visible to +-the guest, or as long as the region is in use by a virtual CPU or another +-device. For example, the owner object will not die between an +-address_space_map operation and the corresponding address_space_unmap. +- +-After creation, a region can be added to an address space or a +-container with memory_region_add_subregion(), and removed using +-memory_region_del_subregion(). +- +-Various region attributes (read-only, dirty logging, coalesced mmio, +-ioeventfd) can be changed during the region lifecycle. They take effect +-as soon as the region is made visible. This can be immediately, later, +-or never. +- +-Destruction of a memory region happens automatically when the owner +-object dies. +- +-If however the memory region is part of a dynamically allocated data +-structure, you should call object_unparent() to destroy the memory region +-before the data structure is freed. For an example see VFIOMSIXInfo +-and VFIOQuirk in hw/vfio/pci.c. +- +-You must not destroy a memory region as long as it may be in use by a +-device or CPU. In order to do this, as a general rule do not create or +-destroy memory regions dynamically during a device's lifetime, and only +-call object_unparent() in the memory region owner's instance_finalize +-callback. The dynamically allocated data structure that contains the +-memory region then should obviously be freed in the instance_finalize +-callback as well. +- +-If you break this rule, the following situation can happen: +- +-- the memory region's owner had a reference taken via memory_region_ref +- (for example by address_space_map) +- +-- the region is unparented, and has no owner anymore +- +-- when address_space_unmap is called, the reference to the memory region's +- owner is leaked. +- +- +-There is an exception to the above rule: it is okay to call +-object_unparent at any time for an alias or a container region. It is +-therefore also okay to create or destroy alias and container regions +-dynamically during a device's lifetime. +- +-This exceptional usage is valid because aliases and containers only help +-QEMU building the guest's memory map; they are never accessed directly. +-memory_region_ref and memory_region_unref are never called on aliases +-or containers, and the above situation then cannot happen. Exploiting +-this exception is rarely necessary, and therefore it is discouraged, +-but nevertheless it is used in a few places. +- +-For regions that "have no owner" (NULL is passed at creation time), the +-machine object is actually used as the owner. Since instance_finalize is +-never called for the machine object, you must never call object_unparent +-on regions that have no owner, unless they are aliases or containers. +- +- +-Overlapping regions and priority +--------------------------------- +-Usually, regions may not overlap each other; a memory address decodes into +-exactly one target. In some cases it is useful to allow regions to overlap, +-and sometimes to control which of an overlapping regions is visible to the +-guest. This is done with memory_region_add_subregion_overlap(), which +-allows the region to overlap any other region in the same container, and +-specifies a priority that allows the core to decide which of two regions at +-the same address are visible (highest wins). +-Priority values are signed, and the default value is zero. This means that +-you can use memory_region_add_subregion_overlap() both to specify a region +-that must sit 'above' any others (with a positive priority) and also a +-background region that sits 'below' others (with a negative priority). +- +-If the higher priority region in an overlap is a container or alias, then +-the lower priority region will appear in any "holes" that the higher priority +-region has left by not mapping subregions to that area of its address range. +-(This applies recursively -- if the subregions are themselves containers or +-aliases that leave holes then the lower priority region will appear in these +-holes too.) +- +-For example, suppose we have a container A of size 0x8000 with two subregions +-B and C. B is a container mapped at 0x2000, size 0x4000, priority 2; C is +-an MMIO region mapped at 0x0, size 0x6000, priority 1. B currently has two +-of its own subregions: D of size 0x1000 at offset 0 and E of size 0x1000 at +-offset 0x2000. As a diagram:: +- +- 0 1000 2000 3000 4000 5000 6000 7000 8000 +- |------|------|------|------|------|------|------|------| +- A: [ ] +- C: [CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC] +- B: [ ] +- D: [DDDDD] +- E: [EEEEE] +- +-The regions that will be seen within this address range then are:: +- +- [CCCCCCCCCCCC][DDDDD][CCCCC][EEEEE][CCCCC] +- +-Since B has higher priority than C, its subregions appear in the flat map +-even where they overlap with C. In ranges where B has not mapped anything +-C's region appears. +- +-If B had provided its own MMIO operations (ie it was not a pure container) +-then these would be used for any addresses in its range not handled by +-D or E, and the result would be:: +- +- [CCCCCCCCCCCC][DDDDD][BBBBB][EEEEE][BBBBB] +- +-Priority values are local to a container, because the priorities of two +-regions are only compared when they are both children of the same container. +-This means that the device in charge of the container (typically modelling +-a bus or a memory controller) can use them to manage the interaction of +-its child regions without any side effects on other parts of the system. +-In the example above, the priorities of D and E are unimportant because +-they do not overlap each other. It is the relative priority of B and C +-that causes D and E to appear on top of C: D and E's priorities are never +-compared against the priority of C. +- +-Visibility +----------- +-The memory core uses the following rules to select a memory region when the +-guest accesses an address: +- +-- all direct subregions of the root region are matched against the address, in +- descending priority order +- +- - if the address lies outside the region offset/size, the subregion is +- discarded +- - if the subregion is a leaf (RAM or MMIO), the search terminates, returning +- this leaf region +- - if the subregion is a container, the same algorithm is used within the +- subregion (after the address is adjusted by the subregion offset) +- - if the subregion is an alias, the search is continued at the alias target +- (after the address is adjusted by the subregion offset and alias offset) +- - if a recursive search within a container or alias subregion does not +- find a match (because of a "hole" in the container's coverage of its +- address range), then if this is a container with its own MMIO or RAM +- backing the search terminates, returning the container itself. Otherwise +- we continue with the next subregion in priority order +- +-- if none of the subregions match the address then the search terminates +- with no match found +- +-Example memory map +------------------- +- +-:: +- +- system_memory: container@0-2^48-1 +- | +- +---- lomem: alias@0-0xdfffffff ---> #ram (0-0xdfffffff) +- | +- +---- himem: alias@0x100000000-0x11fffffff ---> #ram (0xe0000000-0xffffffff) +- | +- +---- vga-window: alias@0xa0000-0xbffff ---> #pci (0xa0000-0xbffff) +- | (prio 1) +- | +- +---- pci-hole: alias@0xe0000000-0xffffffff ---> #pci (0xe0000000-0xffffffff) +- +- pci (0-2^32-1) +- | +- +--- vga-area: container@0xa0000-0xbffff +- | | +- | +--- alias@0x00000-0x7fff ---> #vram (0x010000-0x017fff) +- | | +- | +--- alias@0x08000-0xffff ---> #vram (0x020000-0x027fff) +- | +- +---- vram: ram@0xe1000000-0xe1ffffff +- | +- +---- vga-mmio: mmio@0xe2000000-0xe200ffff +- +- ram: ram@0x00000000-0xffffffff +- +-This is a (simplified) PC memory map. The 4GB RAM block is mapped into the +-system address space via two aliases: "lomem" is a 1:1 mapping of the first +-3.5GB; "himem" maps the last 0.5GB at address 4GB. This leaves 0.5GB for the +-so-called PCI hole, that allows a 32-bit PCI bus to exist in a system with +-4GB of memory. +- +-The memory controller diverts addresses in the range 640K-768K to the PCI +-address space. This is modelled using the "vga-window" alias, mapped at a +-higher priority so it obscures the RAM at the same addresses. The vga window +-can be removed by programming the memory controller; this is modelled by +-removing the alias and exposing the RAM underneath. +- +-The pci address space is not a direct child of the system address space, since +-we only want parts of it to be visible (we accomplish this using aliases). +-It has two subregions: vga-area models the legacy vga window and is occupied +-by two 32K memory banks pointing at two sections of the framebuffer. +-In addition the vram is mapped as a BAR at address e1000000, and an additional +-BAR containing MMIO registers is mapped after it. +- +-Note that if the guest maps a BAR outside the PCI hole, it would not be +-visible as the pci-hole alias clips it to a 0.5GB range. +- +-MMIO Operations +---------------- +- +-MMIO regions are provided with ->read() and ->write() callbacks, +-which are sufficient for most devices. Some devices change behaviour +-based on the attributes used for the memory transaction, or need +-to be able to respond that the access should provoke a bus error +-rather than completing successfully; those devices can use the +-->read_with_attrs() and ->write_with_attrs() callbacks instead. +- +-In addition various constraints can be supplied to control how these +-callbacks are called: +- +-- .valid.min_access_size, .valid.max_access_size define the access sizes +- (in bytes) which the device accepts; accesses outside this range will +- have device and bus specific behaviour (ignored, or machine check) +-- .valid.unaligned specifies that the *device being modelled* supports +- unaligned accesses; if false, unaligned accesses will invoke the +- appropriate bus or CPU specific behaviour. +-- .impl.min_access_size, .impl.max_access_size define the access sizes +- (in bytes) supported by the *implementation*; other access sizes will be +- emulated using the ones available. For example a 4-byte write will be +- emulated using four 1-byte writes, if .impl.max_access_size = 1. +-- .impl.unaligned specifies that the *implementation* supports unaligned +- accesses; if false, unaligned accesses will be emulated by two aligned +- accesses. +diff --git a/docs/devel/memory.txt b/docs/devel/memory.txt +new file mode 100644 +index 0000000..42577e1 +--- /dev/null ++++ b/docs/devel/memory.txt +@@ -0,0 +1,351 @@ ++The memory API ++============== ++ ++The memory API models the memory and I/O buses and controllers of a QEMU ++machine. It attempts to allow modelling of: ++ ++ - ordinary RAM ++ - memory-mapped I/O (MMIO) ++ - memory controllers that can dynamically reroute physical memory regions ++ to different destinations ++ ++The memory model provides support for ++ ++ - tracking RAM changes by the guest ++ - setting up coalesced memory for kvm ++ - setting up ioeventfd regions for kvm ++ ++Memory is modelled as an acyclic graph of MemoryRegion objects. Sinks ++(leaves) are RAM and MMIO regions, while other nodes represent ++buses, memory controllers, and memory regions that have been rerouted. ++ ++In addition to MemoryRegion objects, the memory API provides AddressSpace ++objects for every root and possibly for intermediate MemoryRegions too. ++These represent memory as seen from the CPU or a device's viewpoint. ++ ++Types of regions ++---------------- ++ ++There are multiple types of memory regions (all represented by a single C type ++MemoryRegion): ++ ++- RAM: a RAM region is simply a range of host memory that can be made available ++ to the guest. ++ You typically initialize these with memory_region_init_ram(). Some special ++ purposes require the variants memory_region_init_resizeable_ram(), ++ memory_region_init_ram_from_file(), or memory_region_init_ram_ptr(). ++ ++- MMIO: a range of guest memory that is implemented by host callbacks; ++ each read or write causes a callback to be called on the host. ++ You initialize these with memory_region_init_io(), passing it a ++ MemoryRegionOps structure describing the callbacks. ++ ++- ROM: a ROM memory region works like RAM for reads (directly accessing ++ a region of host memory), and forbids writes. You initialize these with ++ memory_region_init_rom(). ++ ++- ROM device: a ROM device memory region works like RAM for reads ++ (directly accessing a region of host memory), but like MMIO for ++ writes (invoking a callback). You initialize these with ++ memory_region_init_rom_device(). ++ ++- IOMMU region: an IOMMU region translates addresses of accesses made to it ++ and forwards them to some other target memory region. As the name suggests, ++ these are only needed for modelling an IOMMU, not for simple devices. ++ You initialize these with memory_region_init_iommu(). ++ ++- container: a container simply includes other memory regions, each at ++ a different offset. Containers are useful for grouping several regions ++ into one unit. For example, a PCI BAR may be composed of a RAM region ++ and an MMIO region. ++ ++ A container's subregions are usually non-overlapping. In some cases it is ++ useful to have overlapping regions; for example a memory controller that ++ can overlay a subregion of RAM with MMIO or ROM, or a PCI controller ++ that does not prevent card from claiming overlapping BARs. ++ ++ You initialize a pure container with memory_region_init(). ++ ++- alias: a subsection of another region. Aliases allow a region to be ++ split apart into discontiguous regions. Examples of uses are memory banks ++ used when the guest address space is smaller than the amount of RAM ++ addressed, or a memory controller that splits main memory to expose a "PCI ++ hole". Aliases may point to any type of region, including other aliases, ++ but an alias may not point back to itself, directly or indirectly. ++ You initialize these with memory_region_init_alias(). ++ ++- reservation region: a reservation region is primarily for debugging. ++ It claims I/O space that is not supposed to be handled by QEMU itself. ++ The typical use is to track parts of the address space which will be ++ handled by the host kernel when KVM is enabled. You initialize these ++ by passing a NULL callback parameter to memory_region_init_io(). ++ ++It is valid to add subregions to a region which is not a pure container ++(that is, to an MMIO, RAM or ROM region). This means that the region ++will act like a container, except that any addresses within the container's ++region which are not claimed by any subregion are handled by the ++container itself (ie by its MMIO callbacks or RAM backing). However ++it is generally possible to achieve the same effect with a pure container ++one of whose subregions is a low priority "background" region covering ++the whole address range; this is often clearer and is preferred. ++Subregions cannot be added to an alias region. ++ ++Migration ++--------- ++ ++Where the memory region is backed by host memory (RAM, ROM and ++ROM device memory region types), this host memory needs to be ++copied to the destination on migration. These APIs which allocate ++the host memory for you will also register the memory so it is ++migrated: ++ - memory_region_init_ram() ++ - memory_region_init_rom() ++ - memory_region_init_rom_device() ++ ++For most devices and boards this is the correct thing. If you ++have a special case where you need to manage the migration of ++the backing memory yourself, you can call the functions: ++ - memory_region_init_ram_nomigrate() ++ - memory_region_init_rom_nomigrate() ++ - memory_region_init_rom_device_nomigrate() ++which only initialize the MemoryRegion and leave handling ++migration to the caller. ++ ++The functions: ++ - memory_region_init_resizeable_ram() ++ - memory_region_init_ram_from_file() ++ - memory_region_init_ram_from_fd() ++ - memory_region_init_ram_ptr() ++ - memory_region_init_ram_device_ptr() ++are for special cases only, and so they do not automatically ++register the backing memory for migration; the caller must ++manage migration if necessary. ++ ++Region names ++------------ ++ ++Regions are assigned names by the constructor. For most regions these are ++only used for debugging purposes, but RAM regions also use the name to identify ++live migration sections. This means that RAM region names need to have ABI ++stability. ++ ++Region lifecycle ++---------------- ++ ++A region is created by one of the memory_region_init*() functions and ++attached to an object, which acts as its owner or parent. QEMU ensures ++that the owner object remains alive as long as the region is visible to ++the guest, or as long as the region is in use by a virtual CPU or another ++device. For example, the owner object will not die between an ++address_space_map operation and the corresponding address_space_unmap. ++ ++After creation, a region can be added to an address space or a ++container with memory_region_add_subregion(), and removed using ++memory_region_del_subregion(). ++ ++Various region attributes (read-only, dirty logging, coalesced mmio, ++ioeventfd) can be changed during the region lifecycle. They take effect ++as soon as the region is made visible. This can be immediately, later, ++or never. ++ ++Destruction of a memory region happens automatically when the owner ++object dies. ++ ++If however the memory region is part of a dynamically allocated data ++structure, you should call object_unparent() to destroy the memory region ++before the data structure is freed. For an example see VFIOMSIXInfo ++and VFIOQuirk in hw/vfio/pci.c. ++ ++You must not destroy a memory region as long as it may be in use by a ++device or CPU. In order to do this, as a general rule do not create or ++destroy memory regions dynamically during a device's lifetime, and only ++call object_unparent() in the memory region owner's instance_finalize ++callback. The dynamically allocated data structure that contains the ++memory region then should obviously be freed in the instance_finalize ++callback as well. ++ ++If you break this rule, the following situation can happen: ++ ++- the memory region's owner had a reference taken via memory_region_ref ++ (for example by address_space_map) ++ ++- the region is unparented, and has no owner anymore ++ ++- when address_space_unmap is called, the reference to the memory region's ++ owner is leaked. ++ ++ ++There is an exception to the above rule: it is okay to call ++object_unparent at any time for an alias or a container region. It is ++therefore also okay to create or destroy alias and container regions ++dynamically during a device's lifetime. ++ ++This exceptional usage is valid because aliases and containers only help ++QEMU building the guest's memory map; they are never accessed directly. ++memory_region_ref and memory_region_unref are never called on aliases ++or containers, and the above situation then cannot happen. Exploiting ++this exception is rarely necessary, and therefore it is discouraged, ++but nevertheless it is used in a few places. ++ ++For regions that "have no owner" (NULL is passed at creation time), the ++machine object is actually used as the owner. Since instance_finalize is ++never called for the machine object, you must never call object_unparent ++on regions that have no owner, unless they are aliases or containers. ++ ++ ++Overlapping regions and priority ++-------------------------------- ++Usually, regions may not overlap each other; a memory address decodes into ++exactly one target. In some cases it is useful to allow regions to overlap, ++and sometimes to control which of an overlapping regions is visible to the ++guest. This is done with memory_region_add_subregion_overlap(), which ++allows the region to overlap any other region in the same container, and ++specifies a priority that allows the core to decide which of two regions at ++the same address are visible (highest wins). ++Priority values are signed, and the default value is zero. This means that ++you can use memory_region_add_subregion_overlap() both to specify a region ++that must sit 'above' any others (with a positive priority) and also a ++background region that sits 'below' others (with a negative priority). ++ ++If the higher priority region in an overlap is a container or alias, then ++the lower priority region will appear in any "holes" that the higher priority ++region has left by not mapping subregions to that area of its address range. ++(This applies recursively -- if the subregions are themselves containers or ++aliases that leave holes then the lower priority region will appear in these ++holes too.) ++ ++For example, suppose we have a container A of size 0x8000 with two subregions ++B and C. B is a container mapped at 0x2000, size 0x4000, priority 2; C is ++an MMIO region mapped at 0x0, size 0x6000, priority 1. B currently has two ++of its own subregions: D of size 0x1000 at offset 0 and E of size 0x1000 at ++offset 0x2000. As a diagram: ++ ++ 0 1000 2000 3000 4000 5000 6000 7000 8000 ++ |------|------|------|------|------|------|------|------| ++ A: [ ] ++ C: [CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC] ++ B: [ ] ++ D: [DDDDD] ++ E: [EEEEE] ++ ++The regions that will be seen within this address range then are: ++ [CCCCCCCCCCCC][DDDDD][CCCCC][EEEEE][CCCCC] ++ ++Since B has higher priority than C, its subregions appear in the flat map ++even where they overlap with C. In ranges where B has not mapped anything ++C's region appears. ++ ++If B had provided its own MMIO operations (ie it was not a pure container) ++then these would be used for any addresses in its range not handled by ++D or E, and the result would be: ++ [CCCCCCCCCCCC][DDDDD][BBBBB][EEEEE][BBBBB] ++ ++Priority values are local to a container, because the priorities of two ++regions are only compared when they are both children of the same container. ++This means that the device in charge of the container (typically modelling ++a bus or a memory controller) can use them to manage the interaction of ++its child regions without any side effects on other parts of the system. ++In the example above, the priorities of D and E are unimportant because ++they do not overlap each other. It is the relative priority of B and C ++that causes D and E to appear on top of C: D and E's priorities are never ++compared against the priority of C. ++ ++Visibility ++---------- ++The memory core uses the following rules to select a memory region when the ++guest accesses an address: ++ ++- all direct subregions of the root region are matched against the address, in ++ descending priority order ++ - if the address lies outside the region offset/size, the subregion is ++ discarded ++ - if the subregion is a leaf (RAM or MMIO), the search terminates, returning ++ this leaf region ++ - if the subregion is a container, the same algorithm is used within the ++ subregion (after the address is adjusted by the subregion offset) ++ - if the subregion is an alias, the search is continued at the alias target ++ (after the address is adjusted by the subregion offset and alias offset) ++ - if a recursive search within a container or alias subregion does not ++ find a match (because of a "hole" in the container's coverage of its ++ address range), then if this is a container with its own MMIO or RAM ++ backing the search terminates, returning the container itself. Otherwise ++ we continue with the next subregion in priority order ++- if none of the subregions match the address then the search terminates ++ with no match found ++ ++Example memory map ++------------------ ++ ++system_memory: container@0-2^48-1 ++ | ++ +---- lomem: alias@0-0xdfffffff ---> #ram (0-0xdfffffff) ++ | ++ +---- himem: alias@0x100000000-0x11fffffff ---> #ram (0xe0000000-0xffffffff) ++ | ++ +---- vga-window: alias@0xa0000-0xbffff ---> #pci (0xa0000-0xbffff) ++ | (prio 1) ++ | ++ +---- pci-hole: alias@0xe0000000-0xffffffff ---> #pci (0xe0000000-0xffffffff) ++ ++pci (0-2^32-1) ++ | ++ +--- vga-area: container@0xa0000-0xbffff ++ | | ++ | +--- alias@0x00000-0x7fff ---> #vram (0x010000-0x017fff) ++ | | ++ | +--- alias@0x08000-0xffff ---> #vram (0x020000-0x027fff) ++ | ++ +---- vram: ram@0xe1000000-0xe1ffffff ++ | ++ +---- vga-mmio: mmio@0xe2000000-0xe200ffff ++ ++ram: ram@0x00000000-0xffffffff ++ ++This is a (simplified) PC memory map. The 4GB RAM block is mapped into the ++system address space via two aliases: "lomem" is a 1:1 mapping of the first ++3.5GB; "himem" maps the last 0.5GB at address 4GB. This leaves 0.5GB for the ++so-called PCI hole, that allows a 32-bit PCI bus to exist in a system with ++4GB of memory. ++ ++The memory controller diverts addresses in the range 640K-768K to the PCI ++address space. This is modelled using the "vga-window" alias, mapped at a ++higher priority so it obscures the RAM at the same addresses. The vga window ++can be removed by programming the memory controller; this is modelled by ++removing the alias and exposing the RAM underneath. ++ ++The pci address space is not a direct child of the system address space, since ++we only want parts of it to be visible (we accomplish this using aliases). ++It has two subregions: vga-area models the legacy vga window and is occupied ++by two 32K memory banks pointing at two sections of the framebuffer. ++In addition the vram is mapped as a BAR at address e1000000, and an additional ++BAR containing MMIO registers is mapped after it. ++ ++Note that if the guest maps a BAR outside the PCI hole, it would not be ++visible as the pci-hole alias clips it to a 0.5GB range. ++ ++MMIO Operations ++--------------- ++ ++MMIO regions are provided with ->read() and ->write() callbacks, ++which are sufficient for most devices. Some devices change behaviour ++based on the attributes used for the memory transaction, or need ++to be able to respond that the access should provoke a bus error ++rather than completing successfully; those devices can use the ++->read_with_attrs() and ->write_with_attrs() callbacks instead. ++ ++In addition various constraints can be supplied to control how these ++callbacks are called: ++ ++ - .valid.min_access_size, .valid.max_access_size define the access sizes ++ (in bytes) which the device accepts; accesses outside this range will ++ have device and bus specific behaviour (ignored, or machine check) ++ - .valid.unaligned specifies that the *device being modelled* supports ++ unaligned accesses; if false, unaligned accesses will invoke the ++ appropriate bus or CPU specific behaviour. ++ - .impl.min_access_size, .impl.max_access_size define the access sizes ++ (in bytes) supported by the *implementation*; other access sizes will be ++ emulated using the ones available. For example a 4-byte write will be ++ emulated using four 1-byte writes, if .impl.max_access_size = 1. ++ - .impl.unaligned specifies that the *implementation* supports unaligned ++ accesses; if false, unaligned accesses will be emulated by two aligned ++ accesses. +diff --git a/docs/index.rst b/docs/index.rst +deleted file mode 100644 +index 3690955..0000000 +--- a/docs/index.rst ++++ /dev/null +@@ -1,15 +0,0 @@ +-.. QEMU documentation master file, created by +- sphinx-quickstart on Thu Jan 31 16:40:14 2019. +- You can adapt this file completely to your liking, but it should at least +- contain the root `toctree` directive. +- +-Welcome to QEMU's documentation! +-================================ +- +-.. toctree:: +- :maxdepth: 2 +- :caption: Contents: +- +- interop/index +- devel/index +- +diff --git a/docs/interop/conf.py b/docs/interop/conf.py +deleted file mode 100644 +index cf3c69d..0000000 +--- a/docs/interop/conf.py ++++ /dev/null +@@ -1,15 +0,0 @@ +-# -*- coding: utf-8 -*- +-# +-# QEMU documentation build configuration file for the 'interop' manual. +-# +-# This includes the top level conf file and then makes any necessary tweaks. +-import sys +-import os +- +-qemu_docdir = os.path.abspath("..") +-parent_config = os.path.join(qemu_docdir, "conf.py") +-exec(compile(open(parent_config, "rb").read(), parent_config, 'exec')) +- +-# This slightly misuses the 'description', but is the best way to get +-# the manual title to appear in the sidebar. +-html_theme_options['description'] = u'System Emulation Management and Interoperability Guide' +diff --git a/docs/interop/index.rst b/docs/interop/index.rst +deleted file mode 100644 +index 2df977d..0000000 +--- a/docs/interop/index.rst ++++ /dev/null +@@ -1,18 +0,0 @@ +-.. This is the top level page for the 'interop' manual. +- +- +-QEMU System Emulation Management and Interoperability Guide +-=========================================================== +- +-This manual contains documents and specifications that are useful +-for making QEMU interoperate with other software. +- +-Contents: +- +-.. toctree:: +- :maxdepth: 2 +- +- bitmaps +- live-block-operations +- pr-helper +- +-- +1.8.3.1 + diff --git a/block-Fix-AioContext-switch-for-bs-drv-NULL.patch b/block-Fix-AioContext-switch-for-bs-drv-NULL.patch new file mode 100644 index 0000000..c1702d0 --- /dev/null +++ b/block-Fix-AioContext-switch-for-bs-drv-NULL.patch @@ -0,0 +1,107 @@ +From 1bffe1ae7a7b707c3a14ea2ccd00d3609d3ce4d8 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 Apr 2019 17:15:25 +0200 +Subject: [PATCH] block: Fix AioContext switch for bs->drv == NULL + +Even for block nodes with bs->drv == NULL, we can't just ignore a +bdrv_set_aio_context() call. Leaving the node in its old context can +mean that it's still in an iothread context in bdrv_close_all() during +shutdown, resulting in an attempted unlock of the AioContext lock which +we don't hold. + +This is an example stack trace of a related crash: + + #0 0x00007ffff59da57f in raise () at /lib64/libc.so.6 + #1 0x00007ffff59c4895 in abort () at /lib64/libc.so.6 + #2 0x0000555555b97b1e in error_exit (err=, msg=msg@entry=0x555555d386d0 <__func__.19059> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36 + #3 0x0000555555b97f7f in qemu_mutex_unlock_impl (mutex=mutex@entry=0x5555568002f0, file=file@entry=0x555555d378df "util/async.c", line=line@entry=507) at util/qemu-thread-posix.c:97 + #4 0x0000555555b92f55 in aio_context_release (ctx=ctx@entry=0x555556800290) at util/async.c:507 + #5 0x0000555555b05cf8 in bdrv_prwv_co (child=child@entry=0x7fffc80012f0, offset=offset@entry=131072, qiov=qiov@entry=0x7fffffffd4f0, is_write=is_write@entry=true, flags=flags@entry=0) + at block/io.c:833 + #6 0x0000555555b060a9 in bdrv_pwritev (qiov=0x7fffffffd4f0, offset=131072, child=0x7fffc80012f0) at block/io.c:990 + #7 0x0000555555b060a9 in bdrv_pwrite (child=0x7fffc80012f0, offset=131072, buf=, bytes=) at block/io.c:990 + #8 0x0000555555ae172b in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568cc740, i=i@entry=0) at block/qcow2-cache.c:51 + #9 0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568cc740) at block/qcow2-cache.c:248 + #10 0x0000555555ae15de in qcow2_cache_flush (bs=0x555556810680, c=) at block/qcow2-cache.c:259 + #11 0x0000555555ae16b1 in qcow2_cache_flush_dependency (c=0x5555568a1700, c=0x5555568a1700, bs=0x555556810680) at block/qcow2-cache.c:194 + #12 0x0000555555ae16b1 in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568a1700, i=i@entry=0) at block/qcow2-cache.c:194 + #13 0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568a1700) at block/qcow2-cache.c:248 + #14 0x0000555555ae15de in qcow2_cache_flush (bs=bs@entry=0x555556810680, c=) at block/qcow2-cache.c:259 + #15 0x0000555555ad242c in qcow2_inactivate (bs=bs@entry=0x555556810680) at block/qcow2.c:2124 + #16 0x0000555555ad2590 in qcow2_close (bs=0x555556810680) at block/qcow2.c:2153 + #17 0x0000555555ab0c62 in bdrv_close (bs=0x555556810680) at block.c:3358 + #18 0x0000555555ab0c62 in bdrv_delete (bs=0x555556810680) at block.c:3542 + #19 0x0000555555ab0c62 in bdrv_unref (bs=0x555556810680) at block.c:4598 + #20 0x0000555555af4d72 in blk_remove_bs (blk=blk@entry=0x5555568103d0) at block/block-backend.c:785 + #21 0x0000555555af4dbb in blk_remove_all_bs () at block/block-backend.c:483 + #22 0x0000555555aae02f in bdrv_close_all () at block.c:3412 + #23 0x00005555557f9796 in main (argc=, argv=, envp=) at vl.c:4776 + +The reproducer I used is a qcow2 image on gluster volume, where the +virtual disk size (4 GB) is larger than the gluster volume size (64M), +so we can easily trigger an ENOSPC. This backend is assigned to a +virtio-blk device using an iothread, and then from the guest a +'dd if=/dev/zero of=/dev/vda bs=1G count=1' causes the VM to stop +because of an I/O error. qemu_gluster_co_flush_to_disk() sets +bs->drv = NULL on error, so when virtio-blk stops the dataplane, the +block nodes stay in the iothread AioContext. A 'quit' monitor command +issued from this paused state crashes the process. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1631227 +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Max Reitz +Reviewed-by: Stefano Garzarella +(cherry-picked from commit 1bffe1ae7a7b707c3a14ea2ccd00d3609d3ce4d8) +--- + block.c | 12 ++---------- + 1 file changed, 2 insertions(+), 10 deletions(-) + +diff --git a/block.c b/block.c +index 16615bc876..9ae5c0ed2f 100644 +--- a/block.c ++++ b/block.c +@@ -5672,10 +5672,6 @@ void bdrv_detach_aio_context(BlockDriverState *bs) + BdrvAioNotifier *baf, *baf_tmp; + BdrvChild *child; + +- if (!bs->drv) { +- return; +- } +- + assert(!bs->walking_aio_notifiers); + bs->walking_aio_notifiers = true; + QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { +@@ -5690,7 +5686,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs) + */ + bs->walking_aio_notifiers = false; + +- if (bs->drv->bdrv_detach_aio_context) { ++ if (bs->drv && bs->drv->bdrv_detach_aio_context) { + bs->drv->bdrv_detach_aio_context(bs); + } + QLIST_FOREACH(child, &bs->children, next) { +@@ -5709,10 +5705,6 @@ void bdrv_attach_aio_context(BlockDriverState *bs, + BdrvAioNotifier *ban, *ban_tmp; + BdrvChild *child; + +- if (!bs->drv) { +- return; +- } +- + if (bs->quiesce_counter) { + aio_disable_external(new_context); + } +@@ -5722,7 +5714,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, + QLIST_FOREACH(child, &bs->children, next) { + bdrv_attach_aio_context(child->bs, new_context); + } +- if (bs->drv->bdrv_attach_aio_context) { ++ if (bs->drv && bs->drv->bdrv_attach_aio_context) { + bs->drv->bdrv_attach_aio_context(bs, new_context); + } + +-- +2.19.1 + diff --git a/block-file-posix-Unaligned-O_DIRECT-block-status.patch b/block-file-posix-Unaligned-O_DIRECT-block-status.patch new file mode 100644 index 0000000..40d84c8 --- /dev/null +++ b/block-file-posix-Unaligned-O_DIRECT-block-status.patch @@ -0,0 +1,87 @@ +From 9c3db310ff0b7473272ae8dce5e04e2f8a825390 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Wed, 15 May 2019 06:15:40 +0200 +Subject: [PATCH] block/file-posix: Unaligned O_DIRECT block-status + +Currently, qemu crashes whenever someone queries the block status of an +unaligned image tail of an O_DIRECT image: +$ echo > foo +$ qemu-img map --image-opts driver=file,filename=foo,cache.direct=on +Offset Length Mapped to File +qemu-img: block/io.c:2093: bdrv_co_block_status: Assertion `*pnum && +QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset' +failed. + +This is because bdrv_co_block_status() checks that the result returned +by the driver's implementation is aligned to the request_alignment, but +file-posix can fail to do so, which is actually mentioned in a comment +there: "[...] possibly including a partial sector at EOF". + +Fix this by rounding up those partial sectors. + +There are two possible alternative fixes: +(1) We could refuse to open unaligned image files with O_DIRECT + altogether. That sounds reasonable until you realize that qcow2 + does necessarily not fill up its metadata clusters, and that nobody + runs qemu-img create with O_DIRECT. Therefore, unpreallocated qcow2 + files usually have an unaligned image tail. + +(2) bdrv_co_block_status() could ignore unaligned tails. It actually + throws away everything past the EOF already, so that sounds + reasonable. + Unfortunately, the block layer knows file lengths only with a + granularity of BDRV_SECTOR_SIZE, so bdrv_co_block_status() usually + would have to guess whether its file length information is inexact + or whether the driver is broken. + +Fixing what raw_co_block_status() returns is the safest thing to do. + +There seems to be no other block driver that sets request_alignment and +does not make sure that it always returns aligned values. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry-picked from commit 9c3db310ff0b7473272ae8dce5e04e2f8a825390) +--- + block/file-posix.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index e09e15bbf8..d018429672 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2488,6 +2488,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, + off_t data = 0, hole = 0; + int ret; + ++ assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); ++ + ret = fd_open(bs); + if (ret < 0) { + return ret; +@@ -2513,6 +2515,20 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, + /* On a data extent, compute bytes to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(bytes, hole - offset); ++ ++ /* ++ * We are not allowed to return partial sectors, though, so ++ * round up if necessary. ++ */ ++ if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) { ++ int64_t file_length = raw_getlength(bs); ++ if (file_length > 0) { ++ /* Ignore errors, this is just a safeguard */ ++ assert(hole == file_length); ++ } ++ *pnum = ROUND_UP(*pnum, bs->bl.request_alignment); ++ } ++ + ret = BDRV_BLOCK_DATA; + } else { + /* On a hole, compute bytes to the beginning of the next extent. */ +-- +2.19.1 + diff --git a/bridge.conf b/bridge.conf new file mode 100644 index 0000000..a573665 --- /dev/null +++ b/bridge.conf @@ -0,0 +1 @@ +allow virbr0 diff --git a/cpu-add-Cortex-A72-processor-kvm-target-support.patch b/cpu-add-Cortex-A72-processor-kvm-target-support.patch new file mode 100644 index 0000000..a310c37 --- /dev/null +++ b/cpu-add-Cortex-A72-processor-kvm-target-support.patch @@ -0,0 +1,58 @@ +From 4304d1de2c790ac75ed2f5984c4a3a2760c08fff Mon Sep 17 00:00:00 2001 +From: Xu Yandong +Date: Mon, 23 Sep 2019 14:35:25 +0800 +Subject: [PATCH] cpu: add Cortex-A72 processor kvm target support + +The ARM Cortex-A72 is ARMv8-A micro-architecture, +add kvm target to ARM Cortex-A72 processor definition. + +Signed-off-by: Xu Yandong +--- + target/arm/cpu64.c | 2 +- + target/arm/kvm-consts.h | 3 +++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index e408f50d..afbfd1ec 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -212,6 +212,7 @@ static void aarch64_a72_initfn(Object *obj) + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a72"; ++ cpu->kvm_target = QEMU_KVM_ARM_TARGET_GENERIC_V8; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_VFP4); + set_feature(&cpu->env, ARM_FEATURE_NEON); +@@ -276,7 +277,6 @@ static void aarch64_kunpeng_t82_initfn(Object *obj) + cpu->id_aa64dfr0 = 0x110305408; + cpu->isar.id_aa64isar0 = 0x10211120; + cpu->isar.id_aa64mmfr0 = 0x101125; +- cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8; + } + + static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, +diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h +index aad28258..b7dac596 100644 +--- a/target/arm/kvm-consts.h ++++ b/target/arm/kvm-consts.h +@@ -130,6 +130,8 @@ MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED); + #define QEMU_KVM_ARM_TARGET_CORTEX_A57 2 + #define QEMU_KVM_ARM_TARGET_XGENE_POTENZA 3 + #define QEMU_KVM_ARM_TARGET_CORTEX_A53 4 ++/* Generic ARM v8 target */ ++#define QEMU_KVM_ARM_TARGET_GENERIC_V8 5 + + /* There's no kernel define for this: sentinel value which + * matches no KVM target value for either 64 or 32 bit +@@ -142,6 +144,7 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_XGENE_POTENZA, KVM_ARM_TARGET_XGENE_POTENZA); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53); ++MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_GENERIC_V8, KVM_ARM_TARGET_GENERIC_V8); + #else + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A15, KVM_ARM_TARGET_CORTEX_A15); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A7, KVM_ARM_TARGET_CORTEX_A7); +-- +2.23.0 + diff --git a/cpu-add-Kunpeng-T82-cpu-support.patch b/cpu-add-Kunpeng-T82-cpu-support.patch new file mode 100644 index 0000000..87975a2 --- /dev/null +++ b/cpu-add-Kunpeng-T82-cpu-support.patch @@ -0,0 +1,65 @@ +From 70063948181062161a341a8738a53708d8ed0a0b Mon Sep 17 00:00:00 2001 +From: Xu Yandong +Date: Wed, 28 Aug 2019 01:36:21 -0400 +Subject: [PATCH] cpu: add Kunpeng-T82 cpu support + +Signed-off-by: Xu Yandong +--- + hw/arm/virt.c | 1 + + target/arm/cpu64.c | 21 +++++++++++++++++++++ + 2 files changed, 22 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f89757df..11468b72 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -179,6 +179,7 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("cortex-a53"), + ARM_CPU_TYPE_NAME("cortex-a57"), + ARM_CPU_TYPE_NAME("cortex-a72"), ++ ARM_CPU_TYPE_NAME("Kunpeng-T82"), + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), + }; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 228906f2..5581d5e1 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -258,6 +258,26 @@ static void aarch64_a72_initfn(Object *obj) + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); + } + ++static void aarch64_kunpeng_t82_initfn(Object *obj) ++{ ++ ARMCPU *cpu = ARM_CPU(obj); ++ ++ /* ++ * Hisilicon Kunpeng-T82 CPU is based on cortex-a72, ++ * so first initialize cpu data as cortex-a72 CPU, ++ * and then update the special registers. ++ */ ++ aarch64_a72_initfn(obj); ++ ++ cpu->midr = 0x480fd010; ++ cpu->ctr = 0x84448004; ++ cpu->isar.id_aa64pfr0 = 0x11001111; ++ cpu->id_aa64dfr0 = 0x110305408; ++ cpu->isar.id_aa64isar0 = 0x10211120; ++ cpu->isar.id_aa64mmfr0 = 0x101125; ++ cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8; ++} ++ + static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { +@@ -388,6 +408,7 @@ static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, ++ { .name = "Kunpeng-T82", .initfn = aarch64_kunpeng_t82_initfn }, + { .name = "max", .initfn = aarch64_max_initfn }, + { .name = NULL } + }; +-- +2.19.1 + diff --git a/cpu-parse-feature-to-avoid-failure.patch b/cpu-parse-feature-to-avoid-failure.patch new file mode 100644 index 0000000..a241a5f --- /dev/null +++ b/cpu-parse-feature-to-avoid-failure.patch @@ -0,0 +1,72 @@ +From ba1ca232cfa2ca273c610beda40bee2143f11964 Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Tue, 3 Sep 2019 16:27:39 +0800 +Subject: [PATCH] cpu: parse +/- feature to avoid failure + +--- + target/arm/cpu64.c | 38 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 0d492877..6ce87ce0 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -30,6 +30,7 @@ + #include "sysemu/kvm.h" + #include "kvm_arm.h" + #include "qapi/visitor.h" ++#include "hw/qdev-properties.h" + + static inline void set_feature(CPUARMState *env, int feature) + { +@@ -455,10 +456,47 @@ static gchar *aarch64_gdb_arch_name(CPUState *cs) + return g_strdup("aarch64"); + } + ++/* Parse "+feature,-feature,feature=foo" CPU feature string ++ */ ++static void arm_cpu_parse_featurestr(const char *typename, char *features, ++ Error **errp) ++{ ++ char *featurestr; ++ char *val; ++ static bool cpu_globals_initialized; ++ ++ if (cpu_globals_initialized) { ++ return; ++ } ++ cpu_globals_initialized = true; ++ ++ featurestr = features ? strtok(features, ",") : NULL; ++ while (featurestr) { ++ val = strchr(featurestr, '='); ++ if (val) { ++ GlobalProperty *prop = g_new0(typeof(*prop), 1); ++ *val = 0; ++ val++; ++ prop->driver = typename; ++ prop->property = g_strdup(featurestr); ++ prop->value = g_strdup(val); ++ qdev_prop_register_global(prop); ++ } else if (featurestr[0] == '+' || featurestr[0] == '-') { ++ warn_report("Ignore %s feature\n", featurestr); ++ } else { ++ error_setg(errp, "Expected key=value format, found %s.", ++ featurestr); ++ return; ++ } ++ featurestr = strtok(NULL, ","); ++ } ++} ++ + static void aarch64_cpu_class_init(ObjectClass *oc, void *data) + { + CPUClass *cc = CPU_CLASS(oc); + ++ cc->parse_features = arm_cpu_parse_featurestr; + cc->cpu_exec_interrupt = arm_cpu_exec_interrupt; + cc->gdb_read_register = aarch64_cpu_gdb_read_register; + cc->gdb_write_register = aarch64_cpu_gdb_write_register; +-- +2.19.1 + diff --git a/cutils-Fix-size_to_str-on-32-bit-platforms.patch b/cutils-Fix-size_to_str-on-32-bit-platforms.patch new file mode 100644 index 0000000..aaf02e3 --- /dev/null +++ b/cutils-Fix-size_to_str-on-32-bit-platforms.patch @@ -0,0 +1,37 @@ +From 754da86714d550c3f995f11a2587395081362e0a Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 17 Apr 2019 12:11:00 -0500 +Subject: [PATCH] cutils: Fix size_to_str() on 32-bit platforms + +When extracting a human-readable size formatter, we changed 'uint64_t +div' pre-patch to 'unsigned long div' post-patch. Which breaks on +32-bit platforms, resulting in 'inf' instead of intended values larger +than 999GB. + +Fixes: 22951aaa +CC: qemu-stable@nongnu.org +Reported-by: Max Reitz +Signed-off-by: Eric Blake +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry-picked from commit 754da86714d550c3f995f11a2587395081362e0a) +--- + util/cutils.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/util/cutils.c b/util/cutils.c +index e098debdc0..d682c90901 100644 +--- a/util/cutils.c ++++ b/util/cutils.c +@@ -825,7 +825,7 @@ const char *qemu_ether_ntoa(const MACAddr *mac) + char *size_to_str(uint64_t val) + { + static const char *suffixes[] = { "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei" }; +- unsigned long div; ++ uint64_t div; + int i; + + /* +-- +2.19.1 + diff --git a/ehci-fix-queue-dev-null-ptr-dereference.patch b/ehci-fix-queue-dev-null-ptr-dereference.patch new file mode 100644 index 0000000..18114e9 --- /dev/null +++ b/ehci-fix-queue-dev-null-ptr-dereference.patch @@ -0,0 +1,35 @@ +From 901ac0dee4b17890db815d143a8efeeac5d105f7 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Wed, 21 Aug 2019 10:53:19 +0200 +Subject: [PATCH 1/5] ehci: fix queue->dev null ptr dereference + +In case we don't have a device for an active queue, just skip +processing the queue (same we do for inactive queues) and log +a guest bug. + +Reported-by: Guenter Roeck +Signed-off-by: Gerd Hoffmann +Tested-by: Guenter Roeck +Message-id: 20190821085319.13711-1-kraxel@redhat.com +(cherry-picked from commit 1be344b7ad25d572dadeee46d80f0103354352b2) +--- + hw/usb/hcd-ehci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index 62dab05..5f089f3 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -1834,6 +1834,9 @@ static int ehci_state_fetchqtd(EHCIQueue *q) + ehci_set_state(q->ehci, q->async, EST_EXECUTING); + break; + } ++ } else if (q->dev == NULL) { ++ ehci_trace_guest_bug(q->ehci, "no device attached to queue"); ++ ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH); + } else { + p = ehci_alloc_packet(q); + p->qtdaddr = q->qtdaddr; +-- +1.8.3.1 + diff --git a/hw-Nuke-hw_compat_4_0_1-and-pc_compat_4_0_1.patch b/hw-Nuke-hw_compat_4_0_1-and-pc_compat_4_0_1.patch new file mode 100644 index 0000000..15d8077 --- /dev/null +++ b/hw-Nuke-hw_compat_4_0_1-and-pc_compat_4_0_1.patch @@ -0,0 +1,124 @@ +From 56deb8f0db72858985ba077cb0f678f2dbc13bba Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Mon, 5 Aug 2019 15:56:58 +0800 +Subject: [PATCH] hw: Nuke hw_compat_4_0_1 and pc_compat_4_0_1 + +Commit c87759ce876a fixed a regression affecting pc-q35 machines by +introducing a new pc-q35-4.0.1 machine version to be used instead +of pc-q35-4.0. The only purpose was to revert the default behaviour +of not using split irqchip, but the change also introduced the usual +hw_compat and pc_compat bits, and wired them for pc-q35 only. + +This raises questions when it comes to add new compat properties for +4.0* machine versions of any architecture. Where to add them ? In +4.0, 4.0.1 or both ? Error prone. Another possibility would be to teach +all other architectures about 4.0.1. This solution isn't satisfying, +especially since this is a pc-q35 specific issue. + +It turns out that the split irqchip default is handled in the machine +option function and doesn't involve compat lists at all. + +Drop all the 4.0.1 compat lists and use the 4.0 ones instead in the 4.0.1 +machine option function. + +Move the compat props that were added to the 4.0.1 since c87759ce876a to +4.0. + +Even if only hw_compat_4_0_1 had an impact on other architectures, +drop pc_compat_4_0_1 as well for consistency. + +Fixes: c87759ce876a "q35: Revert to kernel irqchip" +Suggested-by: Dr. David Alan Gilbert +Signed-off-by: Greg Kurz +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Michael S. Tsirkin +Message-Id: <156051774276.244890.8660277280145466396.stgit@bahia.lan> +Signed-off-by: Paolo Bonzini +(cherry-picked from commit 8e8cbed09ad9d577955691b4c061b61b602406d1) +--- + hw/core/machine.c | 3 --- + hw/i386/pc.c | 3 --- + hw/i386/pc_q35.c | 9 +++++++-- + include/hw/boards.h | 3 --- + include/hw/i386/pc.h | 3 --- + 5 files changed, 7 insertions(+), 14 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index e41e6698..5d046a43 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -24,9 +24,6 @@ + #include "hw/pci/pci.h" + #include "hw/mem/nvdimm.h" + +-GlobalProperty hw_compat_4_0_1[] = {}; +-const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1); +- + GlobalProperty hw_compat_4_0[] = {}; + const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b5311e7e..d98b737b 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -115,9 +115,6 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; + /* Physical Address of PVH entry point read from kernel ELF NOTE */ + static size_t pvh_start_addr; + +-GlobalProperty pc_compat_4_0_1[] = {}; +-const size_t pc_compat_4_0_1_len = G_N_ELEMENTS(pc_compat_4_0_1); +- + GlobalProperty pc_compat_4_0[] = {}; + const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0); + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index dcddc646..e3498a45 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -378,8 +378,13 @@ static void pc_q35_4_0_1_machine_options(MachineClass *m) + { + pc_q35_4_1_machine_options(m); + m->alias = NULL; +- compat_props_add(m->compat_props, hw_compat_4_0_1, hw_compat_4_0_1_len); +- compat_props_add(m->compat_props, pc_compat_4_0_1, pc_compat_4_0_1_len); ++ /* ++ * This is the default machine for the 4.0-stable branch. It is basically ++ * a 4.0 that doesn't use split irqchip by default. It MUST hence apply the ++ * 4.0 compat props. ++ */ ++ compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len); ++ compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); + } + + DEFINE_Q35_MACHINE(v4_0_1, "pc-q35-4.0.1", NULL, +diff --git a/include/hw/boards.h b/include/hw/boards.h +index ad3950d3..fe1885cb 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -293,9 +293,6 @@ struct MachineState { + } \ + type_init(machine_initfn##_register_types) + +-extern GlobalProperty hw_compat_4_0_1[]; +-extern const size_t hw_compat_4_0_1_len; +- + extern GlobalProperty hw_compat_4_0[]; + extern const size_t hw_compat_4_0_len; + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 5d563624..43df7230 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -293,9 +293,6 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); + int e820_get_num_entries(void); + bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); + +-extern GlobalProperty pc_compat_4_0_1[]; +-extern const size_t pc_compat_4_0_1_len; +- + extern GlobalProperty pc_compat_4_0[]; + extern const size_t pc_compat_4_0_len; + +-- +2.19.1 + diff --git a/hw-add-compat-machines-for-4.1.patch b/hw-add-compat-machines-for-4.1.patch new file mode 100644 index 0000000..67d1ccd --- /dev/null +++ b/hw-add-compat-machines-for-4.1.patch @@ -0,0 +1,232 @@ +From 9bf2650bc3aab126b85c034bf86e27d564f9a1aa Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Thu, 11 Apr 2019 12:20:25 +0200 +Subject: [PATCH] hw: add compat machines for 4.1 + +Add 4.1 machine types for arm/i440fx/q35/s390x/spapr. + +Signed-off-by: Cornelia Huck +Message-Id: <20190411102025.22559-1-cohuck@redhat.com> +Acked-by: Greg Kurz +Acked-by: David Gibson +Signed-off-by: Eduardo Habkost +(cherry-picked from commit 9bf2650bc3aab126b85c034bf86e27d564f9a1aa) +--- + hw/arm/virt.c | 9 ++++++++- + hw/core/machine.c | 3 +++ + hw/i386/pc.c | 3 +++ + hw/i386/pc_piix.c | 14 +++++++++++++- + hw/i386/pc_q35.c | 13 ++++++++++++- + hw/ppc/spapr.c | 15 +++++++++++++-- + hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- + include/hw/boards.h | 3 +++ + include/hw/i386/pc.h | 3 +++ + 9 files changed, 71 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ce2664a30b..16ba67f7a7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1978,10 +1978,17 @@ static void machvirt_machine_init(void) + } + type_init(machvirt_machine_init); + ++static void virt_machine_4_1_options(MachineClass *mc) ++{ ++} ++DEFINE_VIRT_MACHINE_AS_LATEST(4, 1) ++ + static void virt_machine_4_0_options(MachineClass *mc) + { ++ virt_machine_4_1_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); + } +-DEFINE_VIRT_MACHINE_AS_LATEST(4, 0) ++DEFINE_VIRT_MACHINE(4, 0) + + static void virt_machine_3_1_options(MachineClass *mc) + { +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 743fef2898..5d046a43e3 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -24,6 +24,9 @@ + #include "hw/pci/pci.h" + #include "hw/mem/nvdimm.h" + ++GlobalProperty hw_compat_4_0[] = {}; ++const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); ++ + GlobalProperty hw_compat_3_1[] = { + { "pcie-root-port", "x-speed", "2_5" }, + { "pcie-root-port", "x-width", "1" }, +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index f2c15bf1f2..d98b737b8f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -115,6 +115,9 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; + /* Physical Address of PVH entry point read from kernel ELF NOTE */ + static size_t pvh_start_addr; + ++GlobalProperty pc_compat_4_0[] = {}; ++const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0); ++ + GlobalProperty pc_compat_3_1[] = { + { "intel-iommu", "dma-drain", "off" }, + { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 8ad8e885c6..2a7700b564 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -428,13 +428,25 @@ static void pc_i440fx_machine_options(MachineClass *m) + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + } + +-static void pc_i440fx_4_0_machine_options(MachineClass *m) ++static void pc_i440fx_4_1_machine_options(MachineClass *m) + { + pc_i440fx_machine_options(m); + m->alias = "pc"; + m->is_default = 1; + } + ++DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, ++ pc_i440fx_4_1_machine_options); ++ ++static void pc_i440fx_4_0_machine_options(MachineClass *m) ++{ ++ pc_i440fx_4_1_machine_options(m); ++ m->alias = NULL; ++ m->is_default = 0; ++ compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len); ++ compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); ++} ++ + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, + pc_i440fx_4_0_machine_options); + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 372c6b73be..37dd350511 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -365,12 +365,23 @@ static void pc_q35_machine_options(MachineClass *m) + m->max_cpus = 288; + } + +-static void pc_q35_4_0_machine_options(MachineClass *m) ++static void pc_q35_4_1_machine_options(MachineClass *m) + { + pc_q35_machine_options(m); + m->alias = "q35"; + } + ++DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL, ++ pc_q35_4_1_machine_options); ++ ++static void pc_q35_4_0_machine_options(MachineClass *m) ++{ ++ pc_q35_4_1_machine_options(m); ++ m->alias = NULL; ++ compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len); ++ compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); ++} ++ + DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL, + pc_q35_4_0_machine_options); + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index b52b82d298..8438741ec2 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4344,15 +4344,26 @@ static const TypeInfo spapr_machine_info = { + } \ + type_init(spapr_machine_register_##suffix) + ++/* ++ * pseries-4.1 ++ */ ++static void spapr_machine_4_1_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++} ++ ++DEFINE_SPAPR_MACHINE(4_1, "4.1", true); ++ + /* + * pseries-4.0 + */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { +- /* Defaults for the latest behaviour inherited from the base class */ ++ spapr_machine_4_1_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); + } + +-DEFINE_SPAPR_MACHINE(4_0, "4.0", true); ++DEFINE_SPAPR_MACHINE(4_0, "4.0", false); + + /* + * pseries-3.1 +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index d11069b860..02470e78c7 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -646,14 +646,26 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) + ++static void ccw_machine_4_1_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_4_1_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(4_1, "4.1", true); ++ + static void ccw_machine_4_0_instance_options(MachineState *machine) + { ++ ccw_machine_4_1_instance_options(machine); + } + + static void ccw_machine_4_0_class_options(MachineClass *mc) + { ++ ccw_machine_4_1_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); + } +-DEFINE_CCW_MACHINE(4_0, "4.0", true); ++DEFINE_CCW_MACHINE(4_0, "4.0", false); + + static void ccw_machine_3_1_instance_options(MachineState *machine) + { +diff --git a/include/hw/boards.h b/include/hw/boards.h +index e231860666..fe1885cbff 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -293,6 +293,9 @@ struct MachineState { + } \ + type_init(machine_initfn##_register_types) + ++extern GlobalProperty hw_compat_4_0[]; ++extern const size_t hw_compat_4_0_len; ++ + extern GlobalProperty hw_compat_3_1[]; + extern const size_t hw_compat_3_1_len; + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index ca65ef18af..43df7230a2 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -293,6 +293,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); + int e820_get_num_entries(void); + bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); + ++extern GlobalProperty pc_compat_4_0[]; ++extern const size_t pc_compat_4_0_len; ++ + extern GlobalProperty pc_compat_3_1[]; + extern const size_t pc_compat_3_1_len; + +-- +2.19.1 + diff --git a/hw-arm-expose-host-CPU-frequency-info-to-guest.patch b/hw-arm-expose-host-CPU-frequency-info-to-guest.patch new file mode 100644 index 0000000..0b04076 --- /dev/null +++ b/hw-arm-expose-host-CPU-frequency-info-to-guest.patch @@ -0,0 +1,606 @@ +From 773b25c55c7428b64d21b23a6b08fc629a665ca5 Mon Sep 17 00:00:00 2001 +From: zhanghailiang +Date: Mon, 29 Jul 2019 09:54:43 +0800 +Subject: [PATCH] hw/arm: expose host CPU frequency info to guest + +On ARM64, CPU frequency is fetched by ACPI CPPC, so we add virtual +CPPC registers and ACPI _CPC objects. + +The default frequency is set to the nominal frequency of Hi1616, which +will not support CPPC in future. On Hi1620 we are fetching the value +from Host CPPC sys file. + +All performance data are set to the same value for we don't support +guest initiating performance scaling. + +We don't emulate performance counters and simply return 1 for all +counter readings, and guest Linux should fall back to use the desired +performance value as the current performance. + +Signed-off-by: Heyi Guo +Signed-off-by: zhanghailiang +--- + default-configs/aarch64-softmmu.mak | 1 + + hw/acpi/Makefile.objs | 1 + + hw/acpi/aml-build.c | 22 +++ + hw/acpi/cpufreq.c | 278 ++++++++++++++++++++++++++++ + hw/arm/virt-acpi-build.c | 78 +++++++- + hw/arm/virt.c | 13 ++ + hw/char/Kconfig | 4 + + include/hw/acpi/acpi-defs.h | 38 ++++ + include/hw/acpi/aml-build.h | 3 + + include/hw/arm/virt.h | 1 + + 10 files changed, 437 insertions(+), 2 deletions(-) + create mode 100644 hw/acpi/cpufreq.c + +diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak +index 4ea9add0..37399c14 100644 +--- a/default-configs/aarch64-softmmu.mak ++++ b/default-configs/aarch64-softmmu.mak +@@ -10,3 +10,4 @@ CONFIG_XLNX_ZYNQMP=y + CONFIG_XLNX_ZYNQMP_ARM=y + CONFIG_XLNX_VERSAL=y + CONFIG_ARM_SMMUV3=y ++CONFIG_CPUFREQ=y +diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs +index 2d46e378..60979db9 100644 +--- a/hw/acpi/Makefile.objs ++++ b/hw/acpi/Makefile.objs +@@ -12,6 +12,7 @@ common-obj-y += acpi_interface.o + common-obj-y += bios-linker-loader.o + common-obj-y += aml-build.o + common-obj-$(CONFIG_TPM) += tpm.o ++common-obj-$(CONFIG_CPUFREQ) += cpufreq.o + + common-obj-$(CONFIG_IPMI) += ipmi.o + common-obj-$(call lnot,$(CONFIG_IPMI)) += ipmi-stub.o +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 555c24f2..73f97751 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -1369,6 +1369,28 @@ Aml *aml_sleep(uint64_t msec) + return var; + } + ++/* ACPI 5.0b: 6.4.3.7 Generic Register Descriptor */ ++Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width, ++ uint8_t reg_offset, AmlAccessType type, uint64_t addr) ++{ ++ int i; ++ Aml *var = aml_alloc(); ++ build_append_byte(var->buf, 0x82); /* Generic Register Descriptor */ ++ build_append_byte(var->buf, 0x0C); /* Length, bits[7:0] value = 0x0C */ ++ build_append_byte(var->buf, 0); /* Length, bits[15:8] value = 0 */ ++ build_append_byte(var->buf, rs); /* Address Space ID */ ++ build_append_byte(var->buf, reg_width); /* Register Bit Width */ ++ build_append_byte(var->buf, reg_offset); /* Register Bit Offset */ ++ build_append_byte(var->buf, type); /* Access Size */ ++ ++ /* Register address */ ++ for (i = 0; i < 8; i++) { ++ build_append_byte(var->buf, extract64(addr, i * 8, 8)); ++ } ++ ++ return var; ++} ++ + static uint8_t Hex2Byte(const char *src) + { + int hi, lo; +diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c +new file mode 100644 +index 00000000..c123a22b +--- /dev/null ++++ b/hw/acpi/cpufreq.c +@@ -0,0 +1,278 @@ ++/* ++ * ACPI CPPC register device ++ * ++ * Support for showing CPU frequency in guest OS. ++ * ++ * Copyright (c) 2019 HUAWEI TECHNOLOGIES CO.,LTD. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, see . ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/sysbus.h" ++#include "chardev/char.h" ++#include "qemu/log.h" ++#include "trace.h" ++#include "qemu/option.h" ++#include "sysemu/sysemu.h" ++#include "hw/acpi/acpi-defs.h" ++#include "qemu/cutils.h" ++#include "qemu/error-report.h" ++ ++#define TYPE_CPUFREQ "cpufreq" ++#define CPUFREQ(obj) OBJECT_CHECK(CpuhzState, (obj), TYPE_CPUFREQ) ++#define NOMINAL_FREQ_FILE "/sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq" ++#define CPU_MAX_FREQ_FILE "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq" ++#define HZ_MAX_LENGTH 1024 ++#define MAX_SUPPORT_SPACE 0x10000 ++ ++/* ++ * Since Hi1616 will not support CPPC, we simply use its nominal frequency as ++ * the default. ++ */ ++#define DEFAULT_HZ 2400 ++ ++ ++int cppc_regs_offset[CPPC_REG_COUNT] = { ++ [HIGHEST_PERF] = 0, ++ [NOMINAL_PERF] = 4, ++ [LOW_NON_LINEAR_PERF] = 8, ++ [LOWEST_PERF] = 12, ++ [GUARANTEED_PERF] = 16, ++ [DESIRED_PERF] = 20, ++ [MIN_PERF] = -1, ++ [MAX_PERF] = -1, ++ [PERF_REDUC_TOLERANCE] = -1, ++ [TIME_WINDOW] = -1, ++ [CTR_WRAP_TIME] = -1, ++ [REFERENCE_CTR] = 24, ++ [DELIVERED_CTR] = 32, ++ [PERF_LIMITED] = 40, ++ [ENABLE] = -1, ++ [AUTO_SEL_ENABLE] = -1, ++ [AUTO_ACT_WINDOW] = -1, ++ [ENERGY_PERF] = -1, ++ [REFERENCE_PERF] = -1, ++ [LOWEST_FREQ] = 44, ++ [NOMINAL_FREQ] = 48, ++}; ++ ++typedef struct CpuhzState { ++ SysBusDevice parent_obj; ++ ++ MemoryRegion iomem; ++ uint32_t HighestPerformance; ++ uint32_t NominalPerformance; ++ uint32_t LowestNonlinearPerformance; ++ uint32_t LowestPerformance; ++ uint32_t GuaranteedPerformance; ++ uint32_t DesiredPerformance; ++ uint64_t ReferencePerformanceCounter; ++ uint64_t DeliveredPerformanceCounter; ++ uint32_t PerformanceLimited; ++ uint32_t LowestFreq; ++ uint32_t NominalFreq; ++ uint32_t reg_size; ++} CpuhzState; ++ ++ ++static uint64_t cpufreq_read(void *opaque, hwaddr offset, ++ unsigned size) ++{ ++ CpuhzState *s = (CpuhzState *)opaque; ++ uint64_t r; ++ uint64_t n; ++ ++ if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ warn_report("cpufreq_read: offset 0x%lx out of range", offset); ++ return 0; ++ } ++ ++ n = offset % CPPC_REG_PER_CPU_STRIDE; ++ switch (n) { ++ case 0: ++ r = s->HighestPerformance; ++ break; ++ case 4: ++ r = s->NominalPerformance; ++ break; ++ case 8: ++ r = s->LowestNonlinearPerformance; ++ break; ++ case 12: ++ r = s->LowestPerformance; ++ break; ++ case 16: ++ r = s->GuaranteedPerformance; ++ break; ++ case 20: ++ r = s->DesiredPerformance; ++ break; ++ /* ++ * We don't have real counters and it is hard to emulate, so always set the ++ * counter value to 1 to rely on Linux to use the DesiredPerformance value ++ * directly. ++ */ ++ case 24: ++ r = s->ReferencePerformanceCounter; ++ break; ++ /* ++ * Guest may still access the register by 32bit; add the process to ++ * eliminate unnecessary warnings ++ */ ++ case 28: ++ r = s->ReferencePerformanceCounter >> 32; ++ break; ++ case 32: ++ r = s->DeliveredPerformanceCounter; ++ break; ++ case 36: ++ r = s->DeliveredPerformanceCounter >> 32; ++ break; ++ ++ case 40: ++ r = s->PerformanceLimited; ++ break; ++ case 44: ++ r = s->LowestFreq; ++ break; ++ case 48: ++ r = s->NominalFreq; ++ break; ++ default: ++ error_printf("cpufreq_read: Bad offset 0x%lx\n", offset); ++ r = 0; ++ break; ++ } ++ return r; ++} ++ ++ ++static void cpufreq_write(void *opaque, hwaddr offset, ++ uint64_t value, unsigned size) ++{ ++ uint64_t n; ++ ++ if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ error_printf("cpufreq_write: offset 0x%lx out of range", offset); ++ return; ++ } ++ ++ n = offset % CPPC_REG_PER_CPU_STRIDE; ++ ++ switch (n) { ++ case 20: ++ break; ++ default: ++ error_printf("cpufreq_write: Bad offset 0x%lx\n", offset); ++ } ++} ++ ++static uint32_t CPPC_Read(const char *hostpath) ++{ ++ int fd; ++ char buffer[HZ_MAX_LENGTH] = { 0 }; ++ uint64_t hz; ++ int len; ++ const char *endptr = NULL; ++ int ret; ++ ++ fd = qemu_open(hostpath, O_RDONLY); ++ if (fd < 0) { ++ return 0; ++ } ++ ++ len = read(fd, buffer, HZ_MAX_LENGTH); ++ qemu_close(fd); ++ if (len <= 0) { ++ return 0; ++ } ++ ret = qemu_strtoul(buffer, &endptr, 0, &hz); ++ if (ret < 0) { ++ return 0; ++ } ++ return (uint32_t)hz; ++} ++ ++static const MemoryRegionOps cpufreq_ops = { ++ .read = cpufreq_read, ++ .write = cpufreq_write, ++ .endianness = DEVICE_NATIVE_ENDIAN, ++}; ++ ++static void hz_init(CpuhzState *s) ++{ ++ uint32_t hz; ++ ++ hz = CPPC_Read(NOMINAL_FREQ_FILE); ++ if (hz == 0) { ++ hz = CPPC_Read(CPU_MAX_FREQ_FILE); ++ if (hz == 0) { ++ hz = DEFAULT_HZ; ++ } else { ++ /* Value in CpuMaxFrequency is in KHz unit; convert to MHz */ ++ hz = hz / 1000; ++ } ++ } ++ ++ s->HighestPerformance = hz; ++ s->NominalPerformance = hz; ++ s->LowestNonlinearPerformance = hz; ++ s->LowestPerformance = hz; ++ s->GuaranteedPerformance = hz; ++ s->DesiredPerformance = hz; ++ s->ReferencePerformanceCounter = 1; ++ s->DeliveredPerformanceCounter = 1; ++ s->PerformanceLimited = 0; ++ s->LowestFreq = hz; ++ s->NominalFreq = hz; ++} ++ ++static void cpufreq_init(Object *obj) ++{ ++ SysBusDevice *sbd = SYS_BUS_DEVICE(obj); ++ CpuhzState *s = CPUFREQ(obj); ++ ++ s->reg_size = smp_cpus * CPPC_REG_PER_CPU_STRIDE; ++ if (s->reg_size > MAX_SUPPORT_SPACE) { ++ error_report("Required space 0x%x excesses the max support 0x%x", ++ s->reg_size, MAX_SUPPORT_SPACE); ++ goto err_end; ++ } ++ ++ memory_region_init_io(&s->iomem, OBJECT(s), &cpufreq_ops, s, "cpufreq", ++ s->reg_size); ++ sysbus_init_mmio(sbd, &s->iomem); ++ hz_init(s); ++ return; ++ ++err_end: ++ /* Set desired perf register offset to -1 to indicate no support for CPPC */ ++ cppc_regs_offset[DESIRED_PERF] = -1; ++} ++ ++static const TypeInfo cpufreq_arm_info = { ++ .name = TYPE_CPUFREQ, ++ .parent = TYPE_SYS_BUS_DEVICE, ++ .instance_size = sizeof(CpuhzState), ++ .instance_init = cpufreq_init, ++}; ++ ++static void cpufreq_register_types(void) ++{ ++ type_register_static(&cpufreq_arm_info); ++} ++ ++type_init(cpufreq_register_types) ++ +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index bf9c0bc2..33a8e2e3 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -45,11 +45,73 @@ + #include "hw/arm/virt.h" + #include "sysemu/numa.h" + #include "kvm_arm.h" ++#include "hw/acpi/acpi-defs.h" + + #define ARM_SPI_BASE 32 + #define ACPI_POWER_BUTTON_DEVICE "PWRB" + +-static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus) ++static void acpi_dsdt_add_psd(Aml *dev, int cpus) ++{ ++ Aml *pkg; ++ Aml *sub; ++ ++ sub = aml_package(5); ++ aml_append(sub, aml_int(5)); ++ aml_append(sub, aml_int(0)); ++ /* Assume all vCPUs belong to the same domain */ ++ aml_append(sub, aml_int(0)); ++ /* SW_ANY: OSPM coordinate, initiate on any processor */ ++ aml_append(sub, aml_int(0xFD)); ++ aml_append(sub, aml_int(cpus)); ++ ++ pkg = aml_package(1); ++ aml_append(pkg, sub); ++ ++ aml_append(dev, aml_name_decl("_PSD", pkg)); ++} ++ ++static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base, int *regs_offset) ++{ ++ Aml *cpc; ++ int i; ++ ++ /* Use version 3 of CPPC table from ACPI 6.3 */ ++ cpc = aml_package(23); ++ aml_append(cpc, aml_int(23)); ++ aml_append(cpc, aml_int(3)); ++ ++ for (i = 0; i < CPPC_REG_COUNT; i++) { ++ Aml *res; ++ uint8_t reg_width; ++ uint8_t acc_type; ++ uint64_t addr; ++ ++ if (regs_offset[i] == -1) { ++ reg_width = 0; ++ acc_type = AML_ANY_ACC; ++ addr = 0; ++ } else { ++ addr = cpu_base + regs_offset[i]; ++ if (i == REFERENCE_CTR || i == DELIVERED_CTR) { ++ reg_width = 64; ++ acc_type = AML_QWORD_ACC; ++ } else { ++ reg_width = 32; ++ acc_type = AML_DWORD_ACC; ++ } ++ } ++ ++ res = aml_resource_template(); ++ aml_append(res, aml_generic_register(AML_SYSTEM_MEMORY, reg_width, 0, ++ acc_type, addr)); ++ aml_append(cpc, res); ++ } ++ ++ aml_append(dev, aml_name_decl("_CPC", cpc)); ++} ++ ++static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus, ++ const MemMapEntry *cppc_memmap) + { + uint16_t i; + +@@ -57,6 +119,18 @@ static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus) + Aml *dev = aml_device("C%.03X", i); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", aml_int(i))); ++ ++ /* ++ * Append _CPC and _PSD to support CPU frequence show ++ * Check CPPC available by DESIRED_PERF register ++ */ ++ if (cppc_regs_offset[DESIRED_PERF] != -1) { ++ acpi_dsdt_add_cppc(dev, ++ cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE, ++ cppc_regs_offset); ++ acpi_dsdt_add_psd(dev, smp_cpus); ++ } ++ + aml_append(scope, dev); + } + } +@@ -740,7 +814,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms->smp_cpus); ++ acpi_dsdt_add_cpus(scope, vms->smp_cpus, &memmap[VIRT_CPUFREQ]); + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ce2664a3..ec6f00ab 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -132,6 +132,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_SECURE_UART] = { 0x09040000, 0x00001000 }, + [VIRT_SMMU] = { 0x09050000, 0x00020000 }, + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, ++ [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ + [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, + [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 }, +@@ -725,6 +726,16 @@ static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart, + g_free(nodename); + } + ++static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem) ++{ ++ hwaddr base = vms->memmap[VIRT_CPUFREQ].base; ++ DeviceState *dev = qdev_create(NULL, "cpufreq"); ++ SysBusDevice *s = SYS_BUS_DEVICE(dev); ++ ++ qdev_init_nofail(dev); ++ memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0)); ++} ++ + static void create_rtc(const VirtMachineState *vms, qemu_irq *pic) + { + char *nodename; +@@ -1618,6 +1629,8 @@ static void machvirt_init(MachineState *machine) + + create_uart(vms, pic, VIRT_UART, sysmem, serial_hd(0)); + ++ create_cpufreq(vms, sysmem); ++ + if (vms->secure) { + create_secure_ram(vms, secure_sysmem); + create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); +diff --git a/hw/char/Kconfig b/hw/char/Kconfig +index 6360c9ff..8cc3ae2a 100644 +--- a/hw/char/Kconfig ++++ b/hw/char/Kconfig +@@ -40,3 +40,7 @@ config SCLPCONSOLE + + config TERMINAL3270 + bool ++ ++config CPUFREQ ++ bool ++ default y +diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h +index f9aa4bd3..b4899a32 100644 +--- a/include/hw/acpi/acpi-defs.h ++++ b/include/hw/acpi/acpi-defs.h +@@ -652,4 +652,42 @@ struct AcpiIortRC { + } QEMU_PACKED; + typedef struct AcpiIortRC AcpiIortRC; + ++/* ++ * CPPC register definition from kernel header ++ * include/acpi/cppc_acpi.h ++ * The last element is newly added for easy use ++ */ ++enum cppc_regs { ++ HIGHEST_PERF, ++ NOMINAL_PERF, ++ LOW_NON_LINEAR_PERF, ++ LOWEST_PERF, ++ GUARANTEED_PERF, ++ DESIRED_PERF, ++ MIN_PERF, ++ MAX_PERF, ++ PERF_REDUC_TOLERANCE, ++ TIME_WINDOW, ++ CTR_WRAP_TIME, ++ REFERENCE_CTR, ++ DELIVERED_CTR, ++ PERF_LIMITED, ++ ENABLE, ++ AUTO_SEL_ENABLE, ++ AUTO_ACT_WINDOW, ++ ENERGY_PERF, ++ REFERENCE_PERF, ++ LOWEST_FREQ, ++ NOMINAL_FREQ, ++ CPPC_REG_COUNT, ++}; ++ ++#define CPPC_REG_PER_CPU_STRIDE 0x40 ++ ++/* ++ * Offset for each CPPC register; -1 for unavailable ++ * The whole register space is unavailable if desired perf offset is -1. ++ */ ++extern int cppc_regs_offset[CPPC_REG_COUNT]; ++ + #endif +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index 1a563ad7..375335ab 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -347,6 +347,9 @@ Aml *aml_qword_memory(AmlDecode dec, AmlMinFixed min_fixed, + Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz, + uint8_t channel); + Aml *aml_sleep(uint64_t msec); ++Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width, ++ uint8_t reg_offset, AmlAccessType type, ++ uint64_t addr); + + /* Block AML object primitives */ + Aml *aml_scope(const char *name_format, ...) GCC_FMT_ATTR(1, 2); +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 507517c6..8465f9bd 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -66,6 +66,7 @@ enum { + VIRT_GIC_REDIST, + VIRT_SMMU, + VIRT_UART, ++ VIRT_CPUFREQ, + VIRT_MMIO, + VIRT_RTC, + VIRT_FW_CFG, +-- +2.19.1 + diff --git a/hw-arm-virt-Introduce-cpu-topology-support.patch b/hw-arm-virt-Introduce-cpu-topology-support.patch new file mode 100644 index 0000000..027a511 --- /dev/null +++ b/hw-arm-virt-Introduce-cpu-topology-support.patch @@ -0,0 +1,224 @@ +From 123b4eb3cb7b9b4e3e0705a9b5f974b37d3b8431 Mon Sep 17 00:00:00 2001 +From: zhanghailiang +Date: Mon, 5 Aug 2019 15:04:31 +0800 +Subject: [PATCH] hw/arm/virt: Introduce cpu topology support + +Add topology support for guest vcpu by cpu-map in dtb when the guest is booted +with dtb, and by pptt table when the guest is booted with acpi. + +Signed-off-by: Honghao +Signed-off-by: zhanghailiang +(picked-from https://patchwork.ozlabs.org/cover/939301/ which is pushed by +Andrew Jones ) +--- + device_tree.c | 32 +++++++++++++++++++++++ + hw/acpi/aml-build.c | 50 ++++++++++++++++++++++++++++++++++++ + hw/arm/virt-acpi-build.c | 4 +++ + hw/arm/virt.c | 29 +++++++++++++++++++++ + include/hw/acpi/aml-build.h | 2 ++ + include/sysemu/device_tree.h | 1 + + 6 files changed, 118 insertions(+) + +diff --git a/device_tree.c b/device_tree.c +index f8b46b3c..03906a14 100644 +--- a/device_tree.c ++++ b/device_tree.c +@@ -524,6 +524,38 @@ int qemu_fdt_add_subnode(void *fdt, const char *name) + return retval; + } + ++/** ++ * qemu_fdt_add_path ++ * @fdt: Flattened Device Tree ++ * @path: Flattened Device Tree node path ++ * ++ * qemu_fdt_add_path works like qemu_fdt_add_subnode, except it ++ * also recursively adds any missing parent nodes. ++ */ ++int qemu_fdt_add_path(void *fdt, const char *path) ++{ ++ char *parent; ++ int offset; ++ ++ offset = fdt_path_offset(fdt, path); ++ if (offset < 0 && offset != -FDT_ERR_NOTFOUND) { ++ error_report("%s Couldn't find node %s: %s", __func__, path, ++ fdt_strerror(offset)); ++ exit(1); ++ } ++ ++ if (offset != -FDT_ERR_NOTFOUND) { ++ return offset; ++ } ++ ++ parent = g_strdup(path); ++ strrchr(parent, '/')[0] = '\0'; ++ qemu_fdt_add_path(fdt, parent); ++ g_free(parent); ++ ++ return qemu_fdt_add_subnode(fdt, path); ++} ++ + void qemu_fdt_dumpdtb(void *fdt, int size) + { + const char *dumpdtb = qemu_opt_get(qemu_get_machine_opts(), "dumpdtb"); +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 73f97751..9d39ad10 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -25,6 +25,7 @@ + #include "qemu/bswap.h" + #include "qemu/bitops.h" + #include "sysemu/numa.h" ++#include "sysemu/cpus.h" + + static GArray *build_alloc_array(void) + { +@@ -51,6 +52,55 @@ static void build_append_array(GArray *array, GArray *val) + g_array_append_vals(array, val->data, val->len); + } + ++/* ++ * ACPI 6.2 Processor Properties Topology Table (PPTT) ++ */ ++static void build_cpu_hierarchy(GArray *tbl, uint32_t flags, ++ uint32_t parent, uint32_t id) ++{ ++ build_append_byte(tbl, 0); /* Type 0 - processor */ ++ build_append_byte(tbl, 20); /* Length, no private resources */ ++ build_append_int_noprefix(tbl, 0, 2); /* Reserved */ ++ build_append_int_noprefix(tbl, flags, 4); ++ build_append_int_noprefix(tbl, parent, 4); ++ build_append_int_noprefix(tbl, id, 4); ++ build_append_int_noprefix(tbl, 0, 4); /* Num private resources */ ++} ++ ++void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) ++{ ++ int pptt_start = table_data->len; ++ int uid = 0, cpus = 0, socket; ++ ++ acpi_data_push(table_data, sizeof(AcpiTableHeader)); ++ ++ for (socket = 0; cpus < possible_cpus; socket++) { ++ uint32_t socket_offset = table_data->len - pptt_start; ++ int core; ++ ++ build_cpu_hierarchy(table_data, 1, 0, socket); ++ ++ for (core = 0; core < smp_cores; core++) { ++ uint32_t core_offset = table_data->len - pptt_start; ++ int thread; ++ ++ if (smp_threads > 1) { ++ build_cpu_hierarchy(table_data, 0, socket_offset, core); ++ for (thread = 0; thread < smp_threads; thread++) { ++ build_cpu_hierarchy(table_data, 2, core_offset, uid++); ++ } ++ } else { ++ build_cpu_hierarchy(table_data, 2, socket_offset, uid++); ++ } ++ } ++ cpus += smp_cores * smp_threads; ++ } ++ ++ build_header(linker, table_data, ++ (void *)(table_data->data + pptt_start), "PPTT", ++ table_data->len - pptt_start, 1, NULL, NULL); ++} ++ + #define ACPI_NAMESEG_LEN 4 + + static void +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 33a8e2e3..18653e6d 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -870,6 +870,10 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) + acpi_add_table(table_offsets, tables_blob); + build_fadt_rev5(tables_blob, tables->linker, vms, dsdt); + ++ acpi_add_table(table_offsets, tables_blob); ++ ++ build_pptt(tables_blob, tables->linker, vms->smp_cpus); ++ + acpi_add_table(table_offsets, tables_blob); + build_madt(tables_blob, tables->linker, vms); + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 57a78b16..16700a2e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -42,6 +42,7 @@ + #include "net/net.h" + #include "sysemu/device_tree.h" + #include "sysemu/numa.h" ++#include "sysemu/cpus.h" + #include "sysemu/sysemu.h" + #include "sysemu/kvm.h" + #include "hw/loader.h" +@@ -364,8 +365,36 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + ms->possible_cpus->cpus[cs->cpu_index].props.node_id); + } + ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", ++ qemu_fdt_alloc_phandle(vms->fdt)); ++ + g_free(nodename); + } ++ ++ /* Add vcpu topology by fdt node cpu-map. */ ++ qemu_fdt_add_subnode(vms->fdt, "/cpus/cpu-map"); ++ ++ for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) { ++ char *cpu_path = g_strdup_printf("/cpus/cpu@%d", cpu); ++ char *map_path; ++ ++ if (smp_threads > 1) { ++ map_path = g_strdup_printf( ++ "/cpus/cpu-map/%s%d/%s%d/%s%d", ++ "cluster", cpu / (smp_cores * smp_threads), ++ "core", (cpu / smp_threads) % smp_cores, ++ "thread", cpu % smp_threads); ++ } else { ++ map_path = g_strdup_printf( ++ "/cpus/cpu-map/%s%d/%s%d", ++ "cluster", cpu / smp_cores, ++ "core", cpu % smp_cores); ++ } ++ qemu_fdt_add_path(vms->fdt, map_path); ++ qemu_fdt_setprop_phandle(vms->fdt, map_path, "cpu", cpu_path); ++ g_free(map_path); ++ g_free(cpu_path); ++ } + } + + static void fdt_add_its_gic_node(VirtMachineState *vms) +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index 375335ab..bfb0b100 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -417,6 +417,8 @@ build_append_gas_from_struct(GArray *table, const struct AcpiGenericAddress *s) + void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, + uint64_t len, int node, MemoryAffinityFlags flags); + ++void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus); ++ + void build_slit(GArray *table_data, BIOSLinker *linker); + + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, +diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h +index c16fd69b..d62fc873 100644 +--- a/include/sysemu/device_tree.h ++++ b/include/sysemu/device_tree.h +@@ -101,6 +101,7 @@ uint32_t qemu_fdt_get_phandle(void *fdt, const char *path); + uint32_t qemu_fdt_alloc_phandle(void *fdt); + int qemu_fdt_nop_node(void *fdt, const char *node_path); + int qemu_fdt_add_subnode(void *fdt, const char *name); ++int qemu_fdt_add_path(void *fdt, const char *path); + + #define qemu_fdt_setprop_cells(fdt, node_path, property, ...) \ + do { \ +-- +2.19.1 + diff --git a/hw-arm64-add-vcpu-cache-info-support.patch b/hw-arm64-add-vcpu-cache-info-support.patch new file mode 100644 index 0000000..c9e8437 --- /dev/null +++ b/hw-arm64-add-vcpu-cache-info-support.patch @@ -0,0 +1,318 @@ +From 8db6d888e3eb131900111506b93f6101413df5b4 Mon Sep 17 00:00:00 2001 +From: zhanghailiang +Date: Mon, 5 Aug 2019 15:30:05 +0800 +Subject: [PATCH] hw/arm64: add vcpu cache info support + +Support VCPU Cache info by dtb and PPTT table, including L1, L2 and L3 Cache. + +Signed-off-by: zhanghailiang +Signed-off-by: Honghao +--- + hw/acpi/aml-build.c | 124 ++++++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 76 +++++++++++++++++++++- + include/hw/acpi/aml-build.h | 46 +++++++++++++ + 3 files changed, 245 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 9d39ad10..99209c0a 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -55,6 +55,129 @@ static void build_append_array(GArray *array, GArray *val) + /* + * ACPI 6.2 Processor Properties Topology Table (PPTT) + */ ++#ifdef __aarch64__ ++static void build_cache_head(GArray *tbl, uint32_t next_level) ++{ ++ build_append_byte(tbl, 1); ++ build_append_byte(tbl, 24); ++ build_append_int_noprefix(tbl, 0, 2); ++ build_append_int_noprefix(tbl, 127, 4); ++ build_append_int_noprefix(tbl, next_level, 4); ++} ++ ++static void build_cache_tail(GArray *tbl, uint32_t cache_type) ++{ ++ switch (cache_type) { ++ case ARM_L1D_CACHE: /* L1 dcache info*/ ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_SET, 4); ++ build_append_byte(tbl, ARM_L1DCACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L1DCACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L1I_CACHE: /* L1 icache info*/ ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_SET, 4); ++ build_append_byte(tbl, ARM_L1ICACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L1ICACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L2_CACHE: /* L2 cache info*/ ++ build_append_int_noprefix(tbl, ARM_L2CACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L2CACHE_SET, 4); ++ build_append_byte(tbl, ARM_L2CACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L2CACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L2CACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L3_CACHE: /* L3 cache info*/ ++ build_append_int_noprefix(tbl, ARM_L3CACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L3CACHE_SET, 4); ++ build_append_byte(tbl, ARM_L3CACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L3CACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L3CACHE_LINE_SIZE, 2); ++ break; ++ default: ++ build_append_int_noprefix(tbl, 0, 4); ++ build_append_int_noprefix(tbl, 0, 4); ++ build_append_byte(tbl, 0); ++ build_append_byte(tbl, 0); ++ build_append_int_noprefix(tbl, 0, 2); ++ break; ++ } ++} ++ ++static void build_cache_hierarchy(GArray *tbl, ++ uint32_t next_level, uint32_t cache_type) ++{ ++ build_cache_head(tbl, next_level); ++ build_cache_tail(tbl, cache_type); ++} ++ ++static void build_arm_socket_hierarchy(GArray *tbl, ++ uint32_t offset, uint32_t id) ++{ ++ build_append_byte(tbl, 0); /* Type 0 - processor */ ++ build_append_byte(tbl, 24); /* Length, add private resources */ ++ build_append_int_noprefix(tbl, 0, 2); /* Reserved */ ++ build_append_int_noprefix(tbl, 1, 4); /* Processor boundary and id invalid*/ ++ build_append_int_noprefix(tbl, 0, 4); ++ build_append_int_noprefix(tbl, id, 4); ++ build_append_int_noprefix(tbl, 1, 4); /* Num private resources */ ++ build_append_int_noprefix(tbl, offset, 4); ++} ++ ++static void build_arm_cpu_hierarchy(GArray *tbl, ++ struct offset_status *offset, uint32_t id) ++{ ++ if (!offset) { ++ return; ++ } ++ build_append_byte(tbl, 0); /* Type 0 - processor */ ++ build_append_byte(tbl, 32); /* Length, add private resources */ ++ build_append_int_noprefix(tbl, 0, 2); /* Reserved */ ++ build_append_int_noprefix(tbl, 2, 4); /* Valid id*/ ++ build_append_int_noprefix(tbl, offset->parent, 4); ++ build_append_int_noprefix(tbl, id, 4); ++ build_append_int_noprefix(tbl, 3, 4); /* Num private resources */ ++ build_append_int_noprefix(tbl, offset->l1d_offset, 4); ++ build_append_int_noprefix(tbl, offset->l1i_offset, 4); ++ build_append_int_noprefix(tbl, offset->l2_offset, 4); ++} ++ ++void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) ++{ ++ int pptt_start = table_data->len; ++ int uid = 0, cpus = 0, socket; ++ struct offset_status offset; ++ ++ acpi_data_push(table_data, sizeof(AcpiTableHeader)); ++ ++ for (socket = 0; cpus < possible_cpus; socket++) { ++ int core; ++ uint32_t l3_offset = table_data->len - pptt_start; ++ build_cache_hierarchy(table_data, 0, ARM_L3_CACHE); ++ ++ offset.parent = table_data->len - pptt_start; ++ build_arm_socket_hierarchy(table_data, l3_offset, socket); ++ ++ for (core = 0; core < smp_cores; core++) { ++ offset.l2_offset = table_data->len - pptt_start; ++ build_cache_hierarchy(table_data, 0, ARM_L2_CACHE); ++ offset.l1d_offset = table_data->len - pptt_start; ++ build_cache_hierarchy(table_data, offset.l2_offset, ARM_L1D_CACHE); ++ offset.l1i_offset = table_data->len - pptt_start; ++ build_cache_hierarchy(table_data, offset.l2_offset, ARM_L1I_CACHE); ++ build_arm_cpu_hierarchy(table_data, &offset, uid++); ++ cpus++; ++ } ++ } ++ ++ build_header(linker, table_data, ++ (void *)(table_data->data + pptt_start), "PPTT", ++ table_data->len - pptt_start, 1, NULL, NULL); ++} ++ ++#else + static void build_cpu_hierarchy(GArray *tbl, uint32_t flags, + uint32_t parent, uint32_t id) + { +@@ -100,6 +223,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) + (void *)(table_data->data + pptt_start), "PPTT", + table_data->len - pptt_start, 1, NULL, NULL); + } ++#endif + + #define ACPI_NAMESEG_LEN 4 + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 16700a2e..96f56e2e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -304,6 +304,77 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) + GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags); + } + ++static void fdt_add_l3cache_nodes(const VirtMachineState *vms) ++{ ++ int i; ++ unsigned int sockets = vms->smp_cpus / smp_cores; ++ ++ /* If current is not equal to max */ ++ if (vms->smp_cpus % smp_cores) ++ sockets++; ++ ++ for (i = 0; i < sockets; i++) { ++ char *nodename = g_strdup_printf("/cpus/l3-cache%d", i); ++ qemu_fdt_add_subnode(vms->fdt, nodename); ++ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cache"); ++ qemu_fdt_setprop_string(vms->fdt, nodename, "cache-unified", "true"); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-level", 3); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-size", 0x2000000); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-line-size", 128); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-sets", 2048); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", ++ qemu_fdt_alloc_phandle(vms->fdt)); ++ g_free(nodename); ++ } ++} ++ ++ ++static void fdt_add_l2cache_nodes(const VirtMachineState *vms) ++{ ++ int i, j; ++ signed int sockets = vms->smp_cpus / smp_cores; ++ ++ /* If current is not equal to max */ ++ if (vms->smp_cpus % smp_cores) ++ sockets++; ++ ++ for (i = 0; i < sockets; i++) { ++ char *next_path = g_strdup_printf("/cpus/l3-cache%d", i); ++ for (j = 0; j < smp_cores; j++) { ++ char *nodename = g_strdup_printf("/cpus/l2-cache%d", ++ i * smp_cores + j); ++ qemu_fdt_add_subnode(vms->fdt, nodename); ++ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cache"); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-size", 0x80000); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-line-size", 64); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "cache-sets", 1024); ++ qemu_fdt_setprop_phandle(vms->fdt, nodename, ++ "next-level-cache", next_path); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", ++ qemu_fdt_alloc_phandle(vms->fdt)); ++ g_free(nodename); ++ } ++ g_free(next_path); ++ } ++} ++ ++static void fdt_add_l1cache_prop(const VirtMachineState *vms, ++ char *nodename, int cpu) ++{ ++ char *cachename = g_strdup_printf("/cpus/l2-cache%d", cpu); ++ ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "d-cache-size", 0x10000); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "d-cache-line-size", 64); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "d-cache-sets", 256); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "i-cache-size", 0x10000); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "i-cache-line-size", 64); ++ qemu_fdt_setprop_cell(vms->fdt, nodename, "i-cache-sets", 256); ++ qemu_fdt_setprop_phandle(vms->fdt, nodename, ++ "next-level-cache", cachename); ++ g_free(cachename); ++} ++ ++ + static void fdt_add_cpu_nodes(const VirtMachineState *vms) + { + int cpu; +@@ -336,6 +407,9 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#address-cells", addr_cells); + qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#size-cells", 0x0); + ++ fdt_add_l3cache_nodes(vms); ++ fdt_add_l2cache_nodes(vms); ++ + for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) { + char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu); + ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); +@@ -364,7 +438,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", + ms->possible_cpus->cpus[cs->cpu_index].props.node_id); + } +- ++ fdt_add_l1cache_prop(vms, nodename, cpu); + qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", + qemu_fdt_alloc_phandle(vms->fdt)); + +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index bfb0b100..0be3453a 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -223,6 +223,52 @@ struct AcpiBuildTables { + BIOSLinker *linker; + } AcpiBuildTables; + ++#ifdef __aarch64__ ++/* Definitions of the hardcoded cache info*/ ++ ++typedef enum { ++ ARM_L1D_CACHE, ++ ARM_L1I_CACHE, ++ ARM_L2_CACHE, ++ ARM_L3_CACHE ++} ArmCacheType; ++ ++/* L1 data cache: */ ++#define ARM_L1DCACHE_SIZE 65536 ++#define ARM_L1DCACHE_SET 256 ++#define ARM_L1DCACHE_ASSOCIATIVITY 4 ++#define ARM_L1DCACHE_ATTRIBUTES 2 ++#define ARM_L1DCACHE_LINE_SIZE 64 ++ ++/* L1 instruction cache: */ ++#define ARM_L1ICACHE_SIZE 65536 ++#define ARM_L1ICACHE_SET 256 ++#define ARM_L1ICACHE_ASSOCIATIVITY 4 ++#define ARM_L1ICACHE_ATTRIBUTES 4 ++#define ARM_L1ICACHE_LINE_SIZE 64 ++ ++/* Level 2 unified cache: */ ++#define ARM_L2CACHE_SIZE 524288 ++#define ARM_L2CACHE_SET 1024 ++#define ARM_L2CACHE_ASSOCIATIVITY 8 ++#define ARM_L2CACHE_ATTRIBUTES 10 ++#define ARM_L2CACHE_LINE_SIZE 64 ++ ++/* Level 3 unified cache: */ ++#define ARM_L3CACHE_SIZE 33554432 ++#define ARM_L3CACHE_SET 2048 ++#define ARM_L3CACHE_ASSOCIATIVITY 15 ++#define ARM_L3CACHE_ATTRIBUTES 10 ++#define ARM_L3CACHE_LINE_SIZE 128 ++ ++struct offset_status { ++ uint32_t parent; ++ uint32_t l2_offset; ++ uint32_t l1d_offset; ++ uint32_t l1i_offset; ++}; ++ ++#endif + /** + * init_aml_allocator: + * +-- +2.23.0 + diff --git a/i386-acpi-fix-gint-overflow-in-crs_range_compare.patch b/i386-acpi-fix-gint-overflow-in-crs_range_compare.patch new file mode 100644 index 0000000..5f5cbc4 --- /dev/null +++ b/i386-acpi-fix-gint-overflow-in-crs_range_compare.patch @@ -0,0 +1,54 @@ +From 21e2acd583126db94f6d881005cd58e835160582 Mon Sep 17 00:00:00 2001 +From: Evgeny Yakovlev +Date: Thu, 18 Jul 2019 19:14:23 +0300 +Subject: [PATCH] i386/acpi: fix gint overflow in crs_range_compare + +When very large regions (32GB sized in our case, PCI pass-through of GPUs) +are compared substraction result does not fit into gint. + +As a result crs_replace_with_free_ranges does not get sorted ranges and +incorrectly computes PCI64 free space regions. Which then makes linux +guest complain about device and PCI64 hole intersection and device +becomes unusable. + +Fix that by returning exactly fitting ranges. + +Also fix indentation of an entire crs_replace_with_free_ranges to make +checkpatch happy. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Evgeny Yakovlev +Message-Id: <1563466463-26012-1-git-send-email-wrfsh@yandex-team.ru> +Signed-off-by: Evgeny Yakovlev +(cherry-picked from commit 21e2acd583126db94f6d881005cd58e835160582) +--- + hw/i386/acpi-build.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index d281ffa89e..e7b756b51b 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -755,10 +755,16 @@ static void crs_range_set_free(CrsRangeSet *range_set) + + static gint crs_range_compare(gconstpointer a, gconstpointer b) + { +- CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; +- CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; ++ CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; ++ CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; + +- return (int64_t)entry_a->base - (int64_t)entry_b->base; ++ if (entry_a->base < entry_b->base) { ++ return -1; ++ } else if (entry_a->base > entry_b->base) { ++ return 1; ++ } else { ++ return 0; ++ } + } + + /* +-- +2.19.1 + diff --git a/i386-acpi-show-PCI-Express-bus-on-pxb-pcie-expanders.patch b/i386-acpi-show-PCI-Express-bus-on-pxb-pcie-expanders.patch new file mode 100644 index 0000000..6403298 --- /dev/null +++ b/i386-acpi-show-PCI-Express-bus-on-pxb-pcie-expanders.patch @@ -0,0 +1,40 @@ +From ee4b0c8686f781987879508d7c6dd605b5435bac Mon Sep 17 00:00:00 2001 +From: Evgeny Yakovlev +Date: Fri, 19 Jul 2019 11:54:29 +0300 +Subject: [PATCH] i386/acpi: show PCI Express bus on pxb-pcie expanders + +Show PCIe host bridge PNP id with PCI host bridge as a compatible id +when expanding a pcie bus. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Evgeny Yakovlev +Message-Id: <1563526469-15588-1-git-send-email-wrfsh@yandex-team.ru> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit ee4b0c8686f781987879508d7c6dd605b5435bac) +--- + hw/i386/acpi-build.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index e7b756b51b..f3fdfefcd5 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -1914,10 +1914,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + scope = aml_scope("\\_SB"); + dev = aml_device("PC%.02X", bus_num); + aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); +- aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); + aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); + if (pci_bus_is_express(bus)) { ++ aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); ++ aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); + aml_append(dev, build_q35_osc_method()); ++ } else { ++ aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); + } + + if (numa_node != NUMA_NODE_UNASSIGNED) { +-- +2.19.1 + diff --git a/ioapic-kvm-Skip-route-updates-for-masked-pins.patch b/ioapic-kvm-Skip-route-updates-for-masked-pins.patch new file mode 100644 index 0000000..0a27bfd --- /dev/null +++ b/ioapic-kvm-Skip-route-updates-for-masked-pins.patch @@ -0,0 +1,46 @@ +From be1927c97e564346cbd409cb17fe611df74b84e5 Mon Sep 17 00:00:00 2001 +From: Jan Kiszka +Date: Sun, 2 Jun 2019 13:42:13 +0200 +Subject: [PATCH] ioapic: kvm: Skip route updates for masked pins + +Masked entries will not generate interrupt messages, thus do no need to +be routed by KVM. This is a cosmetic cleanup, just avoiding warnings of +the kind + +qemu-system-x86_64: vtd_irte_get: detected non-present IRTE (index=0, high=0xff00, low=0x100) + +if the masked entry happens to reference a non-present IRTE. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Jan Kiszka +Message-Id: +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Peter Xu +(cherry-picked from commit be1927c97e564346cbd409cb17fe611df74b84e5) +--- + hw/intc/ioapic.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c +index c408749876..e99c37cceb 100644 +--- a/hw/intc/ioapic.c ++++ b/hw/intc/ioapic.c +@@ -197,9 +197,11 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) + MSIMessage msg; + struct ioapic_entry_info info; + ioapic_entry_parse(s->ioredtbl[i], &info); +- msg.address = info.addr; +- msg.data = info.data; +- kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); ++ if (!info.masked) { ++ msg.address = info.addr; ++ msg.data = info.data; ++ kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); ++ } + } + kvm_irqchip_commit_routes(kvm_state); + } +-- +2.19.1 + diff --git a/kbd-state-fix-autorepeat-handling.patch b/kbd-state-fix-autorepeat-handling.patch new file mode 100644 index 0000000..aa1bef3 --- /dev/null +++ b/kbd-state-fix-autorepeat-handling.patch @@ -0,0 +1,40 @@ +From 5fff13f245cddd3bc260dfe6ebe1b1f05b72116f Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Tue, 14 May 2019 06:24:43 +0200 +Subject: [PATCH] kbd-state: fix autorepeat handling + +When allowing multiple down-events in a row (key autorepeat) we can't +use change_bit() any more to update the state, because autorepeat events +don't change the key state. We have to explicitly use set_bit() and +clear_bit() instead. + +Cc: qemu-stable@nongnu.org +Fixes: 35921860156e kbd-state: don't block auto-repeat events +Buglink: https://bugs.launchpad.net/qemu/+bug/1828272 +Signed-off-by: Gerd Hoffmann +Message-id: 20190514042443.10735-1-kraxel@redhat.com +(cherry-picked from commit 5fff13f245cddd3bc260dfe6ebe1b1f05b72116f) +--- + ui/kbd-state.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/ui/kbd-state.c b/ui/kbd-state.c +index f3ab2d7a66..1668d17dda 100644 +--- a/ui/kbd-state.c ++++ b/ui/kbd-state.c +@@ -59,7 +59,11 @@ void qkbd_state_key_event(QKbdState *kbd, QKeyCode qcode, bool down) + } + + /* update key and modifier state */ +- change_bit(qcode, kbd->keys); ++ if (down) { ++ set_bit(qcode, kbd->keys); ++ } else { ++ clear_bit(qcode, kbd->keys); ++ } + switch (qcode) { + case Q_KEY_CODE_SHIFT: + case Q_KEY_CODE_SHIFT_R: +-- +2.19.1 + diff --git a/megasas-fix-mapped-frame-size.patch b/megasas-fix-mapped-frame-size.patch new file mode 100644 index 0000000..46cc8d3 --- /dev/null +++ b/megasas-fix-mapped-frame-size.patch @@ -0,0 +1,35 @@ +From 2e56fbc87f6ec3cd56c37b01d313abd502b80d61 Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 4 Apr 2019 14:10:15 +0200 +Subject: [PATCH] megasas: fix mapped frame size + +the current value of 1024 bytes (16 * MFI_FRAME_SIZE) we map is not enough to hold +the maximum number of scatter gather elements we advertise. We actually need a +maximum of 2048 bytes. This is 128 max sg elements * 16 bytes (sizeof (union mfi_sgl)). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20190404121015.28634-1-pl@kamp.de> +Reviewed-by: Hannes Reinecke +Signed-off-by: Paolo Bonzini +(cherry-picked from commit 2e56fbc87f6ec3cd56c37b01d313abd502b80d61) +--- + hw/scsi/megasas.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c +index a56317e026..5ad762de23 100644 +--- a/hw/scsi/megasas.c ++++ b/hw/scsi/megasas.c +@@ -477,7 +477,7 @@ static MegasasCmd *megasas_enqueue_frame(MegasasState *s, + { + PCIDevice *pcid = PCI_DEVICE(s); + MegasasCmd *cmd = NULL; +- int frame_size = MFI_FRAME_SIZE * 16; ++ int frame_size = MEGASAS_MAX_SGE * sizeof(union mfi_sgl); + hwaddr frame_size_p = frame_size; + unsigned long index; + +-- +2.19.1 + diff --git a/memory-unref-the-memory-region-in-simplify-flatview.patch b/memory-unref-the-memory-region-in-simplify-flatview.patch new file mode 100644 index 0000000..eb19964 --- /dev/null +++ b/memory-unref-the-memory-region-in-simplify-flatview.patch @@ -0,0 +1,85 @@ +From b9f43f0cca03586a31b53e47ade72e77db01cb4c Mon Sep 17 00:00:00 2001 +From: King Wang +Date: Fri, 12 Jul 2019 14:52:41 +0800 +Subject: [PATCH 2/5] memory: unref the memory region in simplify flatview + +The memory region reference is increased when insert a range +into flatview range array, then decreased by destroy flatview. +If some flat range merged by flatview_simplify, the memory region +reference can not be decreased by destroy flatview any more. + +In this case, start virtual machine by the command line: +qemu-system-x86_64 +-name guest=ubuntu,debug-threads=on +-machine pc,accel=kvm,usb=off,dump-guest-core=off +-cpu host +-m 16384 +-realtime mlock=off +-smp 8,sockets=2,cores=4,threads=1 +-object memory-backend-file,id=ram-node0,prealloc=yes,mem-path=/dev/hugepages,share=yes,size=8589934592 +-numa node,nodeid=0,cpus=0-3,memdev=ram-node0 +-object memory-backend-file,id=ram-node1,prealloc=yes,mem-path=/dev/hugepages,share=yes,size=8589934592 +-numa node,nodeid=1,cpus=4-7,memdev=ram-node1 +-no-user-config +-nodefaults +-rtc base=utc +-no-shutdown +-boot strict=on +-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 +-device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0x2 +-device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x3 +-drive file=ubuntu.qcow2,format=qcow2,if=none,id=drive-virtio-disk0,cache=none,aio=native +-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 +-chardev pty,id=charserial0 +-device isa-serial,chardev=charserial0,id=serial0 +-device usb-tablet,id=input0,bus=usb.0,port=1 +-vnc 0.0.0.0:0 +-device VGA,id=video0,vgamem_mb=16,bus=pci.0,addr=0x5 +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 +-msg timestamp=on + +And run the script in guest OS: +while true +do + setpci -s 00:06.0 04.b=03 + setpci -s 00:06.0 04.b=07 +done + +I found the reference of node0 HostMemoryBackendFile is a big one. +(gdb) p numa_info[0]->node_memdev->parent.ref +$6 = 1636278 +(gdb) + +Signed-off-by: King Wang +Message-Id: <20190712065241.11784-1-king.wang@huawei.com> +Signed-off-by: Paolo Bonzini +--- + memory.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/memory.c b/memory.c +index 9fbca52..0b49281 100644 +--- a/memory.c ++++ b/memory.c +@@ -320,7 +320,7 @@ static bool can_merge(FlatRange *r1, FlatRange *r2) + /* Attempt to simplify a view by merging adjacent ranges */ + static void flatview_simplify(FlatView *view) + { +- unsigned i, j; ++ unsigned i, j, k; + + i = 0; + while (i < view->nr) { +@@ -331,6 +331,9 @@ static void flatview_simplify(FlatView *view) + ++j; + } + ++i; ++ for (k = i; k < j; k++) { ++ memory_region_unref(view->ranges[k].mr); ++ } + memmove(&view->ranges[i], &view->ranges[j], + (view->nr - j) * sizeof(view->ranges[j])); + view->nr -= j - i; +-- +1.8.3.1 + diff --git a/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch b/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch new file mode 100644 index 0000000..b375c20 --- /dev/null +++ b/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch @@ -0,0 +1,41 @@ +From f995e8b5e5c14f83a16433f192440ec5c82c87fa Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 29 Jul 2019 16:16:35 +0800 +Subject: [PATCH] pl011: reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff + +We can enable ACPI when AArch64 Linux is booted with QEMU and UEFI (AAVMF). +When VM is booting and the SBSA driver has not initialized, writting data +that exceds 32 bytes will cause the read FIFO full and proceeding data will +be lost. The searil port appears to be stuck in this abnormal situation. + +A hack to reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff appears to +resolve the issue. + +The question is fully discussed at +https://www.spinics.net/lists/linux-serial/msg23163.html + +Signed-off-by: Haibin Wang +Reviewed-by: Shannon Zhao +Reviewed-by: Ying Fang +--- + hw/char/pl011.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/char/pl011.c b/hw/char/pl011.c +index e5dd448f..899745ef 100644 +--- a/hw/char/pl011.c ++++ b/hw/char/pl011.c +@@ -223,6 +223,10 @@ static void pl011_write(void *opaque, hwaddr offset, + case 17: /* UARTICR */ + s->int_level &= ~value; + pl011_update(s); ++ if (!s->int_enabled && !s->int_level) { ++ s->read_count = 0; ++ s->read_pos = 0; ++ } + break; + case 18: /* UARTDMACR */ + s->dmacr = value; +-- +2.19.1 + diff --git a/pl031-support-rtc-timer-property-for-pl031.patch b/pl031-support-rtc-timer-property-for-pl031.patch new file mode 100644 index 0000000..7bd977a --- /dev/null +++ b/pl031-support-rtc-timer-property-for-pl031.patch @@ -0,0 +1,49 @@ +From 68d4653b5ffde629e9b05d5de13b6adcde9d153b Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 29 Jul 2019 16:20:51 +0800 +Subject: [PATCH] pl031: support rtc-timer property for pl031 + +This patch adds the rtc-timer property for pl031, we can get the +rtc time (UTC) through qmp command "qom-get date" with this property. + +Signed-off-by: Haibin Wang +Reviewed-by: Shannon Zhao +Reviewed-by: Ying Fang +--- + hw/timer/pl031.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/hw/timer/pl031.c b/hw/timer/pl031.c +index 274ad47a..04331472 100644 +--- a/hw/timer/pl031.c ++++ b/hw/timer/pl031.c +@@ -57,6 +57,15 @@ static uint32_t pl031_get_count(PL031State *s) + return s->tick_offset + now / NANOSECONDS_PER_SECOND; + } + ++static void pl031_get_date(Object *obj, struct tm *current_tm, Error **errp) ++{ ++ PL031State *s = PL031(obj); ++ time_t ti = pl031_get_count(s); ++ ++ /* Changed to UTC time */ ++ gmtime_r(&ti, current_tm); ++} ++ + static void pl031_set_alarm(PL031State *s) + { + uint32_t ticks; +@@ -191,6 +200,10 @@ static void pl031_init(Object *obj) + qemu_clock_get_ns(rtc_clock) / NANOSECONDS_PER_SECOND; + + s->timer = timer_new_ns(rtc_clock, pl031_interrupt, s); ++ ++ object_property_add_tm(OBJECT(s), "date", pl031_get_date, NULL); ++ object_property_add_alias(qdev_get_machine(), "rtc-time", ++ OBJECT(s), "date", NULL); + } + + static int pl031_pre_save(void *opaque) +-- +2.19.1 + diff --git a/q35-Revert-to-kernel-irqchip.patch b/q35-Revert-to-kernel-irqchip.patch new file mode 100644 index 0000000..084217a --- /dev/null +++ b/q35-Revert-to-kernel-irqchip.patch @@ -0,0 +1,132 @@ +From c87759ce876a7a0b17c2bf4f0b964bd51f0ee871 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Tue, 14 May 2019 14:14:41 -0600 +Subject: [PATCH] q35: Revert to kernel irqchip + +Commit b2fc91db8447 ("q35: set split kernel irqchip as default") changed +the default for the pc-q35-4.0 machine type to use split irqchip, which +turned out to have disasterous effects on vfio-pci INTx support. KVM +resampling irqfds are registered for handling these interrupts, but +these are non-functional in split irqchip mode. We can't simply test +for split irqchip in QEMU as userspace handling of this interrupt is a +significant performance regression versus KVM handling (GeForce GPUs +assigned to Windows VMs are non-functional without forcing MSI mode or +re-enabling kernel irqchip). + +The resolution is to revert the change in default irqchip mode in the +pc-q35-4.1 machine and create a pc-q35-4.0.1 machine for the 4.0-stable +branch. The qemu-q35-4.0 machine type should not be used in vfio-pci +configurations for devices requiring legacy INTx support without +explicitly modifying the VM configuration to use kernel irqchip. + +Link: https://bugs.launchpad.net/qemu/+bug/1826422 +Fixes: b2fc91db8447 ("q35: set split kernel irqchip as default") +Signed-off-by: Alex Williamson +Reviewed-by: Peter Xu +Message-Id: <155786484688.13873.6037015630912983760.stgit@gimli.home> +Signed-off-by: Paolo Bonzini +(cherry-picked from commit c87759ce876a7a0b17c2bf4f0b964bd51f0ee871) +--- + hw/core/machine.c | 3 +++ + hw/i386/pc.c | 3 +++ + hw/i386/pc_q35.c | 16 ++++++++++++++-- + include/hw/boards.h | 3 +++ + include/hw/i386/pc.h | 3 +++ + 5 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 16ba667434..f1a0f45f9c 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -24,6 +24,9 @@ + #include "hw/pci/pci.h" + #include "hw/mem/nvdimm.h" + ++GlobalProperty hw_compat_4_0_1[] = {}; ++const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1); ++ + GlobalProperty hw_compat_4_0[] = {}; + const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 2632b73f80..edc240bcbf 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -110,6 +110,9 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; + /* Physical Address of PVH entry point read from kernel ELF NOTE */ + static size_t pvh_start_addr; + ++GlobalProperty pc_compat_4_0_1[] = {}; ++const size_t pc_compat_4_0_1_len = G_N_ELEMENTS(pc_compat_4_0_1); ++ + GlobalProperty pc_compat_4_0[] = {}; + const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0); + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 37dd350511..dcddc64662 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -357,7 +357,7 @@ static void pc_q35_machine_options(MachineClass *m) + m->units_per_default_bus = 1; + m->default_machine_opts = "firmware=bios-256k.bin"; + m->default_display = "std"; +- m->default_kernel_irqchip_split = true; ++ m->default_kernel_irqchip_split = false; + m->no_floppy = 1; + machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); + machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); +@@ -374,10 +374,22 @@ static void pc_q35_4_1_machine_options(MachineClass *m) + DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL, + pc_q35_4_1_machine_options); + +-static void pc_q35_4_0_machine_options(MachineClass *m) ++static void pc_q35_4_0_1_machine_options(MachineClass *m) + { + pc_q35_4_1_machine_options(m); + m->alias = NULL; ++ compat_props_add(m->compat_props, hw_compat_4_0_1, hw_compat_4_0_1_len); ++ compat_props_add(m->compat_props, pc_compat_4_0_1, pc_compat_4_0_1_len); ++} ++ ++DEFINE_Q35_MACHINE(v4_0_1, "pc-q35-4.0.1", NULL, ++ pc_q35_4_0_1_machine_options); ++ ++static void pc_q35_4_0_machine_options(MachineClass *m) ++{ ++ pc_q35_4_0_1_machine_options(m); ++ m->default_kernel_irqchip_split = true; ++ m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len); + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); + } +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 6f7916f88f..6ff02bf3e4 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -292,6 +292,9 @@ struct MachineState { + } \ + type_init(machine_initfn##_register_types) + ++extern GlobalProperty hw_compat_4_0_1[]; ++extern const size_t hw_compat_4_0_1_len; ++ + extern GlobalProperty hw_compat_4_0[]; + extern const size_t hw_compat_4_0_len; + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 43df7230a2..5d5636241e 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -293,6 +293,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); + int e820_get_num_entries(void); + bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); + ++extern GlobalProperty pc_compat_4_0_1[]; ++extern const size_t pc_compat_4_0_1_len; ++ + extern GlobalProperty pc_compat_4_0[]; + extern const size_t pc_compat_4_0_len; + +-- +2.19.1 + diff --git a/qcow2-Add-errp-to-preallocate_co.patch b/qcow2-Add-errp-to-preallocate_co.patch new file mode 100644 index 0000000..0b3dc71 --- /dev/null +++ b/qcow2-Add-errp-to-preallocate_co.patch @@ -0,0 +1,79 @@ +From 360bd07471dfd1830246e8403ffdc9ba9d82f9d4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 15 Apr 2019 16:56:07 +0200 +Subject: [PATCH] qcow2: Add errp to preallocate_co() + +We'll add a bdrv_co_truncate() call in the next patch which can return +an Error that we don't want to discard. So add an errp parameter to +preallocate_co(). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry-picked from commit 360bd07471dfd1830246e8403ffdc9ba9d82f9d4) +--- + block/qcow2.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index dfac74c264..b4f9f5a240 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -2721,7 +2721,7 @@ static int qcow2_set_up_encryption(BlockDriverState *bs, + * Returns: 0 on success, -errno on failure. + */ + static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, +- uint64_t new_length) ++ uint64_t new_length, Error **errp) + { + BDRVQcow2State *s = bs->opaque; + uint64_t bytes; +@@ -2738,6 +2738,7 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, + ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, + &host_offset, &meta); + if (ret < 0) { ++ error_setg_errno(errp, -ret, "Allocating clusters failed"); + return ret; + } + +@@ -2746,6 +2747,7 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, + + ret = qcow2_alloc_cluster_link_l2(bs, meta); + if (ret < 0) { ++ error_setg_errno(errp, -ret, "Mapping clusters failed"); + qcow2_free_any_clusters(bs, meta->alloc_offset, + meta->nb_clusters, QCOW2_DISCARD_NEVER); + return ret; +@@ -2775,6 +2777,7 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, + ret = bdrv_pwrite(s->data_file, (host_offset + cur_bytes) - 1, + &data, 1); + if (ret < 0) { ++ error_setg_errno(errp, -ret, "Writing to EOF failed"); + return ret; + } + } +@@ -3748,9 +3751,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + break; + + case PREALLOC_MODE_METADATA: +- ret = preallocate_co(bs, old_length, offset); ++ ret = preallocate_co(bs, old_length, offset, errp); + if (ret < 0) { +- error_setg_errno(errp, -ret, "Preallocation failed"); + goto fail; + } + break; +@@ -3766,9 +3768,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + /* With a data file, preallocation means just allocating the metadata + * and forwarding the truncate request to the data file */ + if (has_data_file(bs)) { +- ret = preallocate_co(bs, old_length, offset); ++ ret = preallocate_co(bs, old_length, offset, errp); + if (ret < 0) { +- error_setg_errno(errp, -ret, "Preallocation failed"); + goto fail; + } + break; +-- +2.19.1 + diff --git a/qcow2-Avoid-COW-during-metadata-preallocation.patch b/qcow2-Avoid-COW-during-metadata-preallocation.patch new file mode 100644 index 0000000..483d119 --- /dev/null +++ b/qcow2-Avoid-COW-during-metadata-preallocation.patch @@ -0,0 +1,38 @@ +From f29fbf7c6b1c9a84f6931c1c222716fbe073e6e4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 15 Apr 2019 16:25:01 +0200 +Subject: [PATCH] qcow2: Avoid COW during metadata preallocation + +Limiting the allocation to INT_MAX bytes isn't particularly clever +because it means that the final cluster will be a partial cluster which +will be completed through a COW operation. This results in unnecessary +data read and write requests which lead to an unwanted non-sparse +filesystem block for metadata preallocation. + +Align the maximum allocation size down to the cluster size to avoid this +situation. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry-picked from commit f29fbf7c6b1c9a84f6931c1c222716fbe073e6e4) +--- + block/qcow2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 3ace3b2209..dfac74c264 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -2734,7 +2734,7 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, + bytes = new_length - offset; + + while (bytes) { +- cur_bytes = MIN(bytes, INT_MAX); ++ cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size)); + ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, + &host_offset, &meta); + if (ret < 0) { +-- +2.19.1 + diff --git a/qcow2-Fix-full-preallocation-with-external-data-file.patch b/qcow2-Fix-full-preallocation-with-external-data-file.patch new file mode 100644 index 0000000..12ad521 --- /dev/null +++ b/qcow2-Fix-full-preallocation-with-external-data-file.patch @@ -0,0 +1,116 @@ +From 718c0fce2f56755a8d8f737607779a98aa6e7cc4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 15 Apr 2019 16:34:30 +0200 +Subject: [PATCH] qcow2: Fix full preallocation with external data file + +preallocate_co() already gave the data file the full size without +forwarding the requested preallocation mode to the protocol. When +bdrv_co_truncate() was called later with the preallocation mode, the +file didn't actually grow any more, so the data file stayed unallocated +even if full preallocation was requested. + +Pass the right preallocation mode to preallocate_co() and remove the +second bdrv_co_truncate() to fix this. As a side effect, the ugly +one-byte write in preallocate_co() is replaced with a truncate call, +now leaving the last block unallocated on the protocol level as it +should be. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry-picked from commit 718c0fce2f56755a8d8f737607779a98aa6e7cc4) +--- + block/qcow2.c | 41 +++++++++++++++++++++++------------------ + 1 file changed, 23 insertions(+), 18 deletions(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index b4f9f5a240..7fbef97aab 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -2721,11 +2721,13 @@ static int qcow2_set_up_encryption(BlockDriverState *bs, + * Returns: 0 on success, -errno on failure. + */ + static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, +- uint64_t new_length, Error **errp) ++ uint64_t new_length, PreallocMode mode, ++ Error **errp) + { + BDRVQcow2State *s = bs->opaque; + uint64_t bytes; + uint64_t host_offset = 0; ++ int64_t file_length; + unsigned int cur_bytes; + int ret; + QCowL2Meta *meta; +@@ -2772,12 +2774,19 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, + * all of the allocated clusters (otherwise we get failing reads after + * EOF). Extend the image to the last allocated sector. + */ +- if (host_offset != 0) { +- uint8_t data = 0; +- ret = bdrv_pwrite(s->data_file, (host_offset + cur_bytes) - 1, +- &data, 1); ++ file_length = bdrv_getlength(s->data_file->bs); ++ if (file_length < 0) { ++ error_setg_errno(errp, -file_length, "Could not get file size"); ++ return file_length; ++ } ++ ++ if (host_offset + cur_bytes > file_length) { ++ if (mode == PREALLOC_MODE_METADATA) { ++ mode = PREALLOC_MODE_OFF; ++ } ++ ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, mode, ++ errp); + if (ret < 0) { +- error_setg_errno(errp, -ret, "Writing to EOF failed"); + return ret; + } + } +@@ -3748,10 +3757,16 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + + switch (prealloc) { + case PREALLOC_MODE_OFF: ++ if (has_data_file(bs)) { ++ ret = bdrv_co_truncate(s->data_file, offset, prealloc, errp); ++ if (ret < 0) { ++ goto fail; ++ } ++ } + break; + + case PREALLOC_MODE_METADATA: +- ret = preallocate_co(bs, old_length, offset, errp); ++ ret = preallocate_co(bs, old_length, offset, prealloc, errp); + if (ret < 0) { + goto fail; + } +@@ -3768,7 +3783,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + /* With a data file, preallocation means just allocating the metadata + * and forwarding the truncate request to the data file */ + if (has_data_file(bs)) { +- ret = preallocate_co(bs, old_length, offset, errp); ++ ret = preallocate_co(bs, old_length, offset, prealloc, errp); + if (ret < 0) { + goto fail; + } +@@ -3883,16 +3898,6 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, + + bs->total_sectors = offset / BDRV_SECTOR_SIZE; + +- if (has_data_file(bs)) { +- if (prealloc == PREALLOC_MODE_METADATA) { +- prealloc = PREALLOC_MODE_OFF; +- } +- ret = bdrv_co_truncate(s->data_file, offset, prealloc, errp); +- if (ret < 0) { +- goto fail; +- } +- } +- + /* write updated header.size */ + offset = cpu_to_be64(offset); + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), +-- +2.19.1 + diff --git a/qcow2-Fix-qcow2_make_empty-with-external-data-file.patch b/qcow2-Fix-qcow2_make_empty-with-external-data-file.patch new file mode 100644 index 0000000..636e6e7 --- /dev/null +++ b/qcow2-Fix-qcow2_make_empty-with-external-data-file.patch @@ -0,0 +1,49 @@ +From db04524f820582ebf1189223b6378de238511da1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 29 Apr 2019 12:52:21 +0200 +Subject: [PATCH] qcow2: Fix qcow2_make_empty() with external data file + +make_completely_empty() is an optimisated path for bdrv_make_empty() +where completely new metadata is created inside the image file instead +of going through all clusters and discarding them. For an external data +file, however, we actually need to do discard operations on the data +file; just overwriting the qcow2 file doesn't get rid of the data. + +The necessary slow path with an explicit discard operation already +exists for other cases. Use it for external data files, too. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry-picked from commit db04524f820582ebf1189223b6378de238511da1) +--- + block/qcow2.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 7fbef97aab..840f289a48 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -4384,14 +4384,17 @@ static int qcow2_make_empty(BlockDriverState *bs) + + if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps && + 3 + l1_clusters <= s->refcount_block_size && +- s->crypt_method_header != QCOW_CRYPT_LUKS) { ++ s->crypt_method_header != QCOW_CRYPT_LUKS && ++ !has_data_file(bs)) { + /* The following function only works for qcow2 v3 images (it + * requires the dirty flag) and only as long as there are no + * features that reserve extra clusters (such as snapshots, + * LUKS header, or persistent bitmaps), because it completely + * empties the image. Furthermore, the L1 table and three + * additional clusters (image header, refcount table, one +- * refcount block) have to fit inside one refcount block. */ ++ * refcount block) have to fit inside one refcount block. It ++ * only resets the image file, i.e. does not work with an ++ * external data file. */ + return make_completely_empty(bs); + } + +-- +2.19.1 + diff --git a/qcow2-fix-memory-leak-in-qcow2_read_extensions.patch b/qcow2-fix-memory-leak-in-qcow2_read_extensions.patch new file mode 100644 index 0000000..2837a02 --- /dev/null +++ b/qcow2-fix-memory-leak-in-qcow2_read_extensions.patch @@ -0,0 +1,27 @@ +From 76ab77108279f9d328e4a7fe1684141084698d97 Mon Sep 17 00:00:00 2001 +From: zhanghailiang +Date: Thu, 25 Jul 2019 16:05:11 +0800 +Subject: [PATCH] qcow2: fix memory leak in qcow2_read_extensions + +Free feature_table if it is failed in bdrv_pread. + +Signed-off-by: fangyi +--- + block/qcow2.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 3ace3b22..5e85cf4b 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -258,6 +258,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, + void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); + ret = bdrv_pread(bs->file, offset , feature_table, ext.len); + if (ret < 0) { ++ g_free(feature_table); + error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " + "Could not read table"); + return ret; +-- +2.19.1 + diff --git a/qemu-4.0.0.tar.xz b/qemu-4.0.0.tar.xz new file mode 100644 index 0000000..dd626a4 Binary files /dev/null and b/qemu-4.0.0.tar.xz differ diff --git a/qemu-bridge-helper-move-repeating-code-in-parse_acl.patch b/qemu-bridge-helper-move-repeating-code-in-parse_acl.patch new file mode 100644 index 0000000..8cd599a --- /dev/null +++ b/qemu-bridge-helper-move-repeating-code-in-parse_acl.patch @@ -0,0 +1,75 @@ +From 3283dde4b5b5cce0f96f48d536bebff66d97ce0b Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Tue, 23 Jul 2019 16:17:53 +0530 +Subject: [PATCH 2/2] qemu-bridge-helper: move repeating code in parse_acl_file +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Move repeating error handling sequence in parse_acl_file routine +to an 'err' label. + +This patch fixes CVE-2019-13164. + +Signed-off-by: Prasad J Pandit +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Daniel P. BerrangĂ© +Reviewed-by: Li Qiang +Signed-off-by: Jason Wang +(cherry-picked from commit 3283dde4b5b5cce0f96f48d536bebff66d97ce0b) +--- + qemu-bridge-helper.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c +index 2058e10454..3d50ec094c 100644 +--- a/qemu-bridge-helper.c ++++ b/qemu-bridge-helper.c +@@ -102,9 +102,7 @@ static int parse_acl_file(const char *filename, ACLList *acl_list) + + if (arg == NULL) { + fprintf(stderr, "Invalid config line:\n %s\n", line); +- fclose(f); +- errno = EINVAL; +- return -1; ++ goto err; + } + + *arg = 0; +@@ -121,9 +119,7 @@ static int parse_acl_file(const char *filename, ACLList *acl_list) + + if (!g_str_equal(cmd, "include") && strlen(arg) >= IFNAMSIZ) { + fprintf(stderr, "name `%s' too long: %zu\n", arg, strlen(arg)); +- fclose(f); +- errno = EINVAL; +- return -1; ++ goto err; + } + + if (strcmp(cmd, "deny") == 0) { +@@ -149,15 +145,18 @@ static int parse_acl_file(const char *filename, ACLList *acl_list) + parse_acl_file(arg, acl_list); + } else { + fprintf(stderr, "Unknown command `%s'\n", cmd); +- fclose(f); +- errno = EINVAL; +- return -1; ++ goto err; + } + } + + fclose(f); +- + return 0; ++ ++err: ++ fclose(f); ++ errno = EINVAL; ++ return -1; ++ + } + + static bool has_vnet_hdr(int fd) +-- +2.19.1 + diff --git a/qemu-bridge-helper-restrict-interface-name-to-IFNAMS.patch b/qemu-bridge-helper-restrict-interface-name-to-IFNAMS.patch new file mode 100644 index 0000000..b6dc25e --- /dev/null +++ b/qemu-bridge-helper-restrict-interface-name-to-IFNAMS.patch @@ -0,0 +1,60 @@ +From 6f5d8671225dc77190647f18a27a0d156d4ca97a Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Tue, 23 Jul 2019 16:17:52 +0530 +Subject: [PATCH 1/2] qemu-bridge-helper: restrict interface name to IFNAMSIZ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The network interface name in Linux is defined to be of size +IFNAMSIZ(=16), including the terminating null('\0') byte. +The same is applied to interface names read from 'bridge.conf' +file to form ACL rules. If user supplied '--br=bridge' name +is not restricted to the same length, it could lead to ACL bypass +issue. Restrict interface name to IFNAMSIZ, including null byte. + +This patch fixes CVE-2019-13164. + +Reported-by: Riccardo Schirone +Signed-off-by: Prasad J Pandit +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Daniel P. BerrangĂ© +Reviewed-by: Li Qiang +Signed-off-by: Jason Wang +(cherry-picked from commit 6f5d8671225dc77190647f18a27a0d156d4ca97a) +--- + qemu-bridge-helper.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c +index 95624bc300..2058e10454 100644 +--- a/qemu-bridge-helper.c ++++ b/qemu-bridge-helper.c +@@ -119,6 +119,13 @@ static int parse_acl_file(const char *filename, ACLList *acl_list) + } + *argend = 0; + ++ if (!g_str_equal(cmd, "include") && strlen(arg) >= IFNAMSIZ) { ++ fprintf(stderr, "name `%s' too long: %zu\n", arg, strlen(arg)); ++ fclose(f); ++ errno = EINVAL; ++ return -1; ++ } ++ + if (strcmp(cmd, "deny") == 0) { + acl_rule = g_malloc(sizeof(*acl_rule)); + if (strcmp(arg, "all") == 0) { +@@ -269,6 +276,10 @@ int main(int argc, char **argv) + usage(); + return EXIT_FAILURE; + } ++ if (strlen(bridge) >= IFNAMSIZ) { ++ fprintf(stderr, "name `%s' too long: %zu\n", bridge, strlen(bridge)); ++ return EXIT_FAILURE; ++ } + + /* parse default acl file */ + QSIMPLEQ_INIT(&acl_list); +-- +2.19.1 + diff --git a/qemu.spec b/qemu.spec new file mode 100644 index 0000000..259ab25 --- /dev/null +++ b/qemu.spec @@ -0,0 +1,439 @@ +Name: qemu +Version: 4.0.0 +Release: 3 +Epoch: 2 +Summary: QEMU is a generic and open source machine emulator and virtualizer +License: GPLv2 and BSD and MIT and CC-BY +URL: http://www.qemu.org +Source0: https://www.qemu.org/download/%{name}-%{version}%{?rcstr}.tar.xz +Source1: 99-qemu-guest-agent.rules +Source2: bridge.conf + +Patch0001: qxl-check-release-info-object.patch +Patch0002: target-i386-define-md-clear-bit.patch +Patch0003: Revert-Enable-build-and-install-of-our-rST-docs.patch +Patch0004: ARM64-record-vtimer-tick-when-cpu-is-stopped.patch +Patch0005: pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch +Patch0006: pl031-support-rtc-timer-property-for-pl031.patch +Patch0007: vhost-cancel-migration-when-vhost-user-restarted.patch +Patch0008: qcow2-fix-memory-leak-in-qcow2_read_extensions.patch +Patch0009: hw-arm-expose-host-CPU-frequency-info-to-guest.patch +Patch0010: block-Fix-AioContext-switch-for-bs-drv-NULL.patch +Patch0011: cutils-Fix-size_to_str-on-32-bit-platforms.patch +Patch0012: qcow2-Avoid-COW-during-metadata-preallocation.patch +Patch0013: qcow2-Add-errp-to-preallocate_co.patch +Patch0014: qcow2-Fix-full-preallocation-with-external-data-file.patch +Patch0015: qcow2-Fix-qcow2_make_empty-with-external-data-file.patch +Patch0016: megasas-fix-mapped-frame-size.patch +Patch0017: kbd-state-fix-autorepeat-handling.patch +Patch0018: block-file-posix-Unaligned-O_DIRECT-block-status.patch +Patch0019: hw-add-compat-machines-for-4.1.patch +Patch0020: q35-Revert-to-kernel-irqchip.patch +Patch0021: hw-Nuke-hw_compat_4_0_1-and-pc_compat_4_0_1.patch +Patch0022: vl-Fix-drive-blockdev-persistent-reservation-managem.patch +Patch0023: vhost-fix-vhost_log-size-overflow-during-migration.patch +Patch0024: virtio-pci-fix-missing-device-properties.patch +Patch0025: i386-acpi-fix-gint-overflow-in-crs_range_compare.patch +Patch0026: ioapic-kvm-Skip-route-updates-for-masked-pins.patch +Patch0027: i386-acpi-show-PCI-Express-bus-on-pxb-pcie-expanders.patch +Patch0028: virtio-balloon-Fix-wrong-sign-extension-of-PFNs.patch +Patch0029: virtio-balloon-Fix-QEMU-crashes-on-pagesize-BALLOON_.patch +Patch0030: virtio-balloon-Simplify-deflate-with-pbp.patch +Patch0031: virtio-balloon-Better-names-for-offset-variables.patch +Patch0032: virtio-balloon-Rework-pbp-tracking-data.patch +Patch0033: virtio-balloon-Use-temporary-PBP-only.patch +Patch0034: virtio-balloon-don-t-track-subpages-for-the-PBP.patch +Patch0035: virtio-balloon-free-pbp-more-aggressively.patch +Patch0036: qemu-bridge-helper-restrict-interface-name-to-IFNAMS.patch +Patch0037: qemu-bridge-helper-move-repeating-code-in-parse_acl.patch +Patch0038: smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch +Patch0039: hw-arm-virt-Introduce-cpu-topology-support.patch +Patch0040: hw-arm64-add-vcpu-cache-info-support.patch +Patch0041: xhci-Fix-memory-leak-in-xhci_address_slot.patch +Patch0042: xhci-Fix-memory-leak-in-xhci_kick_epctx.patch +Patch0043: ehci-fix-queue-dev-null-ptr-dereference.patch +Patch0044: memory-unref-the-memory-region-in-simplify-flatview.patch +Patch0045: scsi-lsi-exit-infinite-loop-while-executing-script-C.patch +Patch0046: util-async-hold-AioContext-ref-to-prevent-use-after-.patch +Patch0047: vhost-user-scsi-prevent-using-uninitialized-vqs.patch +Patch0048: cpu-add-Kunpeng-T82-cpu-support.patch +Patch0049: cpu-parse-feature-to-avoid-failure.patch +Patch0050: cpu-add-Cortex-A72-processor-kvm-target-support.patch + +BuildRequires: flex +BuildRequires: bison +BuildRequires: texinfo +BuildRequires: perl-podlators +BuildRequires: kernel +BuildRequires: chrpath +BuildRequires: gettext + +BuildRequires: zlib-devel +BuildRequires: gtk3-devel +BuildRequires: gnutls-devel +BuildRequires: numactl-devel +BuildRequires: device-mapper-multipath-devel +BuildRequires: libfdt-devel +BuildRequires: rdma-core-devel +BuildRequires: libcap-devel +BuildRequires: libcap-ng-devel +BuildRequires: cyrus-sasl-devel +BuildRequires: libaio-devel +BuildRequires: virglrenderer-devel +BuildRequires: usbredir-devel >= 0.5.2 +BuildRequires: libseccomp-devel >= 2.3.0 +BuildRequires: systemd-devel +BuildRequires: libiscsi-devel +BuildRequires: snappy-devel +BuildRequires: lzo-devel +BuildRequires: ncurses-devel +BuildRequires: libattr-devel +BuildRequires: libcurl-devel +BuildRequires: libjpeg-devel +BuildRequires: libpng-devel +BuildRequires: brlapi-devel +BuildRequires: pixman-devel +BuildRequires: libusbx-devel +BuildRequires: bzip2-devel +BuildRequires: libepoxy-devel +BuildRequires: libtasn1-devel +BuildRequires: libxml2-devel +%ifarch x86_64 +BuildRequires: libpmem-devel +%endif +BuildRequires: libudev-devel +BuildRequires: pam-devel +BuildRequires: perl-Test-Harness +BuildRequires: python3-devel + +Requires(post): /usr/bin/getent +Requires(post): /usr/sbin/groupadd +Requires(post): /usr/sbin/useradd +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +%description +QEMU is a FAST! processor emulator using dynamic translation to achieve good emulation speed. + +QEMU has two operating modes: + + Full system emulation. In this mode, QEMU emulates a full system (for example a PC), + including one or several processors and various peripherals. It can be used to launch + different Operating Systems without rebooting the PC or to debug system code. + + User mode emulation. In this mode, QEMU can launch processes compiled for one CPU on another CPU. + It can be used to launch the Wine Windows API emulator (https://www.winehq.org) or to ease + cross-compilation and cross-debugging. +You can refer to https://www.qemu.org for more infortmation. + +%package guest-agent +Summary: QEMU guest agent +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units +%description guest-agent +This package provides an agent to run inside guests, which communicates +with the host over a virtio-serial channel named "org.qemu.guest_agent.0" +Please refer to https://wiki.qemu.org/Features/GuestAgent for more information. + +%package help +Summary: Documents for qemu +Buildarch: noarch +%description help +This package provides documents for qemu related man help and information. + +%package img +Summary: QEMU command line tool for manipulating disk images +%description img +This package provides a command line tool for manipulating disk images + +%prep +%setup -q -n qemu-%{version}%{?rcstr} +%autopatch -p1 + +%build +%ifarch x86_64 +buildarch="x86_64-softmmu" +%endif +%ifarch aarch64 +buildarch="aarch64-softmmu" +%endif + +buildldflags="VL_LDFLAGS=-Wl,--build-id" + +./configure \ + --prefix=%{_prefix} \ + --target-list=${buildarch} \ + --extra-cflags="%{optflags} -fPIE -DPIE -fno-inline -fPIC" \ + --extra-ldflags="-Wl,--build-id -pie -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack" \ + --datadir=%{_datadir} \ + --docdir=%{_docdir}/%{name} \ + --libdir=%{_libdir} \ + --libexecdir=%{_libexecdir} \ + --localstatedir=%{_localstatedir} \ + --sysconfdir=%{_sysconfdir} \ + --interp-prefix=%{_prefix}/qemu-%%M \ + --firmwarepath=%{_datadir}/%{name} \ + --with-pkgversion=%{name}-%{version}-%{release} \ + --disable-strip \ + --disable-werror \ + --disable-slirp \ + --enable-gtk \ + --enable-docs \ + --enable-guest-agent \ + --enable-pie \ + --enable-numa \ + --enable-mpath \ + --disable-libnfs \ + --disable-bzip2 \ + --enable-fdt \ + --enable-kvm \ + --enable-tcg \ + --enable-rdma \ + --enable-linux-aio \ + --enable-cap-ng \ + --enable-vhost-user \ + --enable-virglrenderer \ + --enable-cap-ng \ + --enable-libusb \ + --disable-bluez \ + --disable-dmg \ + --disable-qcow1 \ + --disable-vdi \ + --disable-vvfat \ + --disable-qed \ + --disable-parallels \ + --disable-sheepdog \ + --disable-capstone \ + --disable-smartcard + +make %{?_smp_mflags} $buildldflags V=1 + +cp -a ${buildarch}/qemu-system-* qemu-kvm + +%install + +make %{?_smp_mflags} DESTDIR=%{buildroot} \ + install + +%find_lang %{name} +install -m 0755 qemu-kvm %{buildroot}%{_libexecdir}/ +rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-* +install -D -p -m 0644 contrib/systemd/qemu-pr-helper.service %{buildroot}%{_unitdir}/qemu-pr-helper.service +install -D -p -m 0644 contrib/systemd/qemu-pr-helper.socket %{buildroot}%{_unitdir}/qemu-pr-helper.socket +install -D -p -m 0644 qemu.sasl %{buildroot}%{_sysconfdir}/sasl2/qemu.conf +install -D -m 0644 %{_sourcedir}/bridge.conf %{buildroot}%{_sysconfdir}/qemu/bridge.conf + +# For qemu-guest-agent package +%global _udevdir /lib/udev/rules.d +install -D -p -m 0644 contrib/systemd/qemu-guest-agent.service %{buildroot}%{_unitdir}/qemu-guest-agent.service +install -D -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevdir}/99-qemu-guest-agent.rules +mkdir -p %{buildroot}%{_localstatedir}/log +touch %{buildroot}%{_localstatedir}/log/qga-fsfreeze-hook.log + +%global qemudocdir %{_docdir}/%{name} +install -D -p -m 0644 -t %{buildroot}%{qemudocdir} Changelog README COPYING COPYING.LIB LICENSE +chmod -x %{buildroot}%{_mandir}/man1/* + + +%ifarch aarch64 +rm -rf %{buildroot}%{_datadir}/%{name}/vgabios*bin +rm -rf %{buildroot}%{_datadir}/%{name}/bios*.bin +rm -rf %{buildroot}%{_datadir}/%{name}/linuxboot.bin +rm -rf %{buildroot}%{_datadir}/%{name}/kvmvapic.bin +rm -rf %{buildroot}%{_datadir}/%{name}/sgabios.bin +rm -rf %{buildroot}%{_datadir}/%{name}/multiboot.bin +%endif +rm -rf %{buildroot}%{_datadir}/%{name}/openbios-* +rm -rf %{buildroot}%{_datadir}/%{name}/slof.bin +rm -rf %{buildroot}%{_datadir}/%{name}/QEMU,*.bin +rm -rf %{buildroot}%{_datadir}/%{name}/bamboo.dtb +rm -rf %{buildroot}%{_datadir}/%{name}/canyonlands.dtb +rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware.img +rm -rf %{buildroot}%{_datadir}/%{name}/linuxboot_dma.bin +rm -rf %{buildroot}%{_datadir}/%{name}/palcode-clipper +rm -rf %{buildroot}%{_datadir}/%{name}/petalogix-* +rm -rf %{buildroot}%{_datadir}/%{name}/ppc_* +rm -rf %{buildroot}%{_datadir}/%{name}/pvh.bin +rm -rf %{buildroot}%{_datadir}/%{name}/qemu_vga.ndrv +rm -rf %{buildroot}%{_datadir}/%{name}/s390-* +rm -rf %{buildroot}%{_datadir}/%{name}/skiboot.lid +rm -rf %{buildroot}%{_datadir}/%{name}/spapr-* +rm -rf %{buildroot}%{_datadir}/%{name}/u-boot* +rm -rf %{buildroot}%{_bindir}/ivshmem* + +for f in %{buildroot}%{_bindir}/* %{buildroot}%{_libdir}/* \ + %{buildroot}%{_libexecdir}/*; do + if file $f | grep -q ELF | grep -q -i shared; then chrpath --delete $f; fi +done + +%check +make check V=1 + +%pre +getent group kvm >/dev/null || groupadd -g 36 -r kvm +getent group qemu >/dev/null || groupadd -g 107 -r qemu +getent passwd qemu >/dev/null || \ + useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ + -c "qemu user" qemu + +%post guest-agent +%systemd_post qemu-guest-agent.service +%preun guest-agent +%systemd_preun qemu-guest-agent.service +%postun guest-agent +%systemd_postun_with_restart qemu-guest-agent.service + +%files -f %{name}.lang +%dir %{_datadir}/%{name}/ +%{_libexecdir}/qemu-kvm +%{_datadir}/%{name}/efi-virtio.rom +%{_datadir}/%{name}/efi-e1000.rom +%{_datadir}/%{name}/efi-e1000e.rom +%{_datadir}/%{name}/efi-rtl8139.rom +%{_datadir}/%{name}/efi-pcnet.rom +%{_datadir}/%{name}/efi-ne2k_pci.rom +%{_datadir}/%{name}/efi-eepro100.rom +%{_datadir}/%{name}/efi-vmxnet3.rom +%{_datadir}/%{name}/pxe-virtio.rom +%{_datadir}/%{name}/pxe-e1000.rom +%{_datadir}/%{name}/pxe-ne2k_pci.rom +%{_datadir}/%{name}/pxe-pcnet.rom +%{_datadir}/%{name}/pxe-rtl8139.rom +%{_datadir}/%{name}/pxe-eepro100.rom +%{_datadir}/%{name}/trace-events-all +%{_datadir}/applications/qemu.desktop +%{_datadir}/icons/hicolor/*/apps/* +%{_datadir}/%{name}/keymaps/ +%{_bindir}/elf2dmp +%{_bindir}/qemu-edid +%{_bindir}/qemu-keymap +%{_bindir}/qemu-pr-helper +%{_bindir}/virtfs-proxy-helper +%{_unitdir}/qemu-pr-helper.service +%{_unitdir}/qemu-pr-helper.socket +%attr(4755, root, root) %{_libexecdir}/qemu-bridge-helper +%config(noreplace) %{_sysconfdir}/sasl2/qemu.conf +%dir %{_sysconfdir}/qemu +%config(noreplace) %{_sysconfdir}/qemu/bridge.conf +%ifarch x86_64 +%{_datadir}/%{name}/bios.bin +%{_datadir}/%{name}/bios-256k.bin +%{_datadir}/%{name}/vgabios.bin +%{_datadir}/%{name}/vgabios-cirrus.bin +%{_datadir}/%{name}/vgabios-qxl.bin +%{_datadir}/%{name}/vgabios-stdvga.bin +%{_datadir}/%{name}/vgabios-vmware.bin +%{_datadir}/%{name}/vgabios-virtio.bin +%{_datadir}/%{name}/vgabios-ramfb.bin +%{_datadir}/%{name}/vgabios-bochs-display.bin +%{_datadir}/%{name}/linuxboot.bin +%{_datadir}/%{name}/multiboot.bin +%{_datadir}/%{name}/kvmvapic.bin +%{_datadir}/%{name}/sgabios.bin +%endif + +%files help +%dir %{qemudocdir} +%doc %{qemudocdir}/qemu-doc.html +%doc %{qemudocdir}/qemu-doc.txt +%doc %{qemudocdir}/qemu-ga-ref.html +%doc %{qemudocdir}/qemu-ga-ref.txt +%doc %{qemudocdir}/qemu-qmp-ref.html +%doc %{qemudocdir}/qemu-qmp-ref.txt +%doc %{qemudocdir}/README +%doc %{qemudocdir}/Changelog +%doc %{qemudocdir}/COPYING +%doc %{qemudocdir}/COPYING.LIB +%doc %{qemudocdir}/LICENSE +%{_mandir}/man1/qemu.1* +%{_mandir}/man1/virtfs-proxy-helper.1* +%{_mandir}/man7/qemu-block-drivers.7* +%{_mandir}/man7/qemu-cpu-models.7* +%{_mandir}/man7/qemu-ga-ref.7* +%{_mandir}/man7/qemu-qmp-ref.7* +%{_mandir}/man1/qemu-img.1* +%{_mandir}/man8/qemu-nbd.8* + +%files guest-agent +%defattr(-,root,root,-) +%{_bindir}/qemu-ga +%{_mandir}/man8/qemu-ga.8* +%{_unitdir}/qemu-guest-agent.service +%{_udevdir}/99-qemu-guest-agent.rules +%ghost %{_localstatedir}/log/qga-fsfreeze-hook.log + +%files img +%{_bindir}/qemu-img +%{_bindir}/qemu-io +%{_bindir}/qemu-nbd + + +%changelog +* Mon Sep 9 2019 backport from qemu upstream +- ehci-fix-queue-dev-null-ptr-dereference.patch +- memory-unref-the-memory-region-in-simplify-flatview.patch +- scsi-lsi-exit-infinite-loop-while-executing-script-C.patch +- util-async-hold-AioContext-ref-to-prevent-use-after-.patch +- vhost-user-scsi-prevent-using-uninitialized-vqs.patch + +* Fri Aug 30 2019 Huawei Technologies Co., Ltd. +- xhci: Fix memory leak in xhci_address_slot +- xhci: Fix memory leak in xhci_kick_epctx + +* Wed Aug 7 2019 Huawei Technologies Co., Ltd. +- hw/arm/virt: Introduce cpu topology support +- hw/arm64: add vcpu cache info support + +* Tue Aug 6 2019 Huawei Technologies Co., Ltd. +- Update release version to 4.0.0-2 + +* Mon Aug 5 2019 Huawei Technologies Co., Ltd. +- enable make check +- smbios: Add missing member of type 4 for smbios 3.0 + +* Mon Aug 5 2019 fix CVE-2019-13164 +- qemu-bridge-helper: restrict interface name to IFNAMSIZ +- qemu-bridge-helper: move repeating code in parse_acl_file + +* Wed Jul 31 2019 backport from qemu upstream +- block: Fix AioContext switch for bs->drv == NULL +- cutils: Fix size_to_str() on 32-bit platforms +- qcow2: Avoid COW during metadata preallocation +- qcow2: Add errp to preallocate_co() +- qcow2: qcow2: Fix full preallocation with external data file +- qcow2: Fix qcow2_make_empty() with external data file +- megasas: fix mapped frame size +- kbd-state: fix autorepeat handling +- block/file-posix: Unaligned O_DIRECT block-status +- hw: add compat machines for 4.1 +- q35: Revert to kernel irqchip +- hw: Nuke hw_compat_4_0_1 and pc_compat_4_0_1 +- vl: Fix -drive / -blockdev persistent reservation management +- vhost: fix vhost_log size overflow during migration +- virtio-pci: fix missing device properties +- i386/acpi: fix gint overflow in crs_range_compare +- ioapic: kvm: Skip route updates for masked pins +- i386/acpi: show PCI Express bus on pxb-pcie expanders +- virtio-balloon: Fix wrong sign extension of PFNs +- virtio-balloon: Fix QEMU crashes on pagesize > BALLOON_PAGE_SIZE +- virtio-balloon: Simplify deflate with pbp +- virtio-balloon: Better names for offset variables in inflate/deflate code +- virtio-balloon: Rework pbp tracking data +- virtio-balloon: Use temporary PBP only +- virtio-balloon: virtio-balloon: don't track subpages for the PBP +- virtio-balloon: free pbp more aggressively + +* Tue Jul 30 2019 Huawei Technologies Co., Ltd. +- vhost: cancel migration when vhost-user restarted +- pl031: support rtc-timer property for pl031 +- pl011: reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff +- ARM64: record vtimer tick when cpu is stopped + +* Tue Jul 23 2019 openEuler Buildteam - version-release +- Package init diff --git a/qxl-check-release-info-object.patch b/qxl-check-release-info-object.patch new file mode 100644 index 0000000..aeddbe4 --- /dev/null +++ b/qxl-check-release-info-object.patch @@ -0,0 +1,36 @@ +From cbed4e0108ca1403f1f47cde292330b87a0d8bf2 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 25 Apr 2019 12:05:34 +0530 +Subject: [PATCH] qxl: check release info object + +When releasing spice resources in release_resource() routine, +if release info object 'ext.info' is null, it leads to null +pointer dereference. Add check to avoid it. + +(This is cherry-pick d52680fc932efb8a2f334cc6993e705ed1e31e99) + +Reported-by: Bugs SysSec +Signed-off-by: Prasad J Pandit +Message-id: 20190425063534.32747-1-ppandit@redhat.com +Signed-off-by: Gerd Hoffmann +--- + hw/display/qxl.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index c8ce578..632923a 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -777,6 +777,9 @@ static void interface_release_resource(QXLInstance *sin, + QXLReleaseRing *ring; + uint64_t *item, id; + ++ if (!ext.info) { ++ return; ++ } + if (ext.group_id == MEMSLOT_GROUP_HOST) { + /* host group -> vga mode update request */ + QXLCommandExt *cmdext = (void *)(intptr_t)(ext.info->id); +-- +1.8.3.1 + diff --git a/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch b/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch new file mode 100644 index 0000000..f78c286 --- /dev/null +++ b/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch @@ -0,0 +1,104 @@ +From dee1c7242e6b11d502728840f36d021ceedce140 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 14 Aug 2019 17:35:21 +0530 +Subject: [PATCH 3/5] scsi: lsi: exit infinite loop while executing script + (CVE-2019-12068) + +When executing script in lsi_execute_script(), the LSI scsi adapter +emulator advances 's->dsp' index to read next opcode. This can lead +to an infinite loop if the next opcode is empty. Move the existing +loop exit after 10k iterations so that it covers no-op opcodes as +well. + +Reported-by: Bugs SysSec +Signed-off-by: Paolo Bonzini +Signed-off-by: Prasad J Pandit +Signed-off-by: Paolo Bonzini +(cherry-picked from commit de594e47659029316bbf9391efb79da0a1a08e08) +--- + hw/scsi/lsi53c895a.c | 41 +++++++++++++++++++++++++++-------------- + 1 file changed, 27 insertions(+), 14 deletions(-) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index da7239d..d3380b6 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -184,6 +184,9 @@ static const char *names[] = { + /* Flag set if this is a tagged command. */ + #define LSI_TAG_VALID (1 << 16) + ++/* Maximum instructions to process. */ ++#define LSI_MAX_INSN 10000 ++ + typedef struct lsi_request { + SCSIRequest *req; + uint32_t tag; +@@ -1131,7 +1134,21 @@ static void lsi_execute_script(LSIState *s) + + s->istat1 |= LSI_ISTAT1_SRUN; + again: +- insn_processed++; ++ if (++insn_processed > LSI_MAX_INSN) { ++ /* Some windows drivers make the device spin waiting for a memory ++ location to change. If we have been executed a lot of code then ++ assume this is the case and force an unexpected device disconnect. ++ This is apparently sufficient to beat the drivers into submission. ++ */ ++ if (!(s->sien0 & LSI_SIST0_UDC)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "lsi_scsi: inf. loop with UDC masked"); ++ } ++ lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); ++ lsi_disconnect(s); ++ trace_lsi_execute_script_stop(); ++ return; ++ } + insn = read_dword(s, s->dsp); + if (!insn) { + /* If we receive an empty opcode increment the DSP by 4 bytes +@@ -1568,19 +1585,7 @@ again: + } + } + } +- if (insn_processed > 10000 && s->waiting == LSI_NOWAIT) { +- /* Some windows drivers make the device spin waiting for a memory +- location to change. If we have been executed a lot of code then +- assume this is the case and force an unexpected device disconnect. +- This is apparently sufficient to beat the drivers into submission. +- */ +- if (!(s->sien0 & LSI_SIST0_UDC)) { +- qemu_log_mask(LOG_GUEST_ERROR, +- "lsi_scsi: inf. loop with UDC masked"); +- } +- lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); +- lsi_disconnect(s); +- } else if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) { ++ if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) { + if (s->dcntl & LSI_DCNTL_SSM) { + lsi_script_dma_interrupt(s, LSI_DSTAT_SSI); + } else { +@@ -1968,6 +1973,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) + case 0x2f: /* DSP[24:31] */ + s->dsp &= 0x00ffffff; + s->dsp |= val << 24; ++ /* ++ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one ++ * instruction. Is this correct? ++ */ + if ((s->dmode & LSI_DMODE_MAN) == 0 + && (s->istat1 & LSI_ISTAT1_SRUN) == 0) + lsi_execute_script(s); +@@ -1986,6 +1995,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) + break; + case 0x3b: /* DCNTL */ + s->dcntl = val & ~(LSI_DCNTL_PFF | LSI_DCNTL_STD); ++ /* ++ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one ++ * instruction. Is this correct? ++ */ + if ((val & LSI_DCNTL_STD) && (s->istat1 & LSI_ISTAT1_SRUN) == 0) + lsi_execute_script(s); + break; +-- +1.8.3.1 + diff --git a/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch b/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch new file mode 100644 index 0000000..c45af3e --- /dev/null +++ b/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch @@ -0,0 +1,57 @@ +From e52fdbd850b49304c5bbd5f19c9f518b80efef42 Mon Sep 17 00:00:00 2001 +From: zhanghailiang +Date: Wed, 31 Jul 2019 15:40:55 +0800 +Subject: [PATCH] smbios: Add missing member of type 4 for smbios 3.0 + +According to smbios 3.0 spec, for processor information (type 4), +it adds three new members (Core Count 2, Core enabled 2, thread count 2) for 3.0, + +Without this three members, we can not get correct cpu frequency from dmi, +Because it will failed to check the length of Processor Infomation in DMI. + +The corresponding codes in kernel is like: + if (dm->type == DMI_ENTRY_PROCESSOR && + dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { + u16 val = (u16)get_unaligned((const u16 *) + (dmi_data + DMI_PROCESSOR_MAX_SPEED)); + *mhz = val > *mhz ? val : *mhz; + } + +Signed-off-by: zhanghailiang +--- + hw/smbios/smbios.c | 4 +++- + include/hw/firmware/smbios.h | 3 +++ + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 47be9071..b11ec6e3 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -600,7 +600,9 @@ static void smbios_build_type_4_table(unsigned instance) + t->thread_count = smp_threads; + t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ + t->processor_family2 = cpu_to_le16(0x01); /* Other */ +- ++ t->corecount2 = 0; ++ t->enabledcorecount2 = 0; ++ t->threadcount2 = 0; + SMBIOS_BUILD_TABLE_POST; + smbios_type4_count++; + } +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 6fef32a3..70eb7304 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -193,6 +193,9 @@ struct smbios_type_4 { + uint8_t thread_count; + uint16_t processor_characteristics; + uint16_t processor_family2; ++ uint16_t corecount2; ++ uint16_t enabledcorecount2; ++ uint16_t threadcount2; + } QEMU_PACKED; + + /* SMBIOS type 11 - OEM strings */ +-- +2.19.1 + diff --git a/target-i386-define-md-clear-bit.patch b/target-i386-define-md-clear-bit.patch new file mode 100644 index 0000000..57289e4 --- /dev/null +++ b/target-i386-define-md-clear-bit.patch @@ -0,0 +1,29 @@ +From: Paolo Bonzini +Date: Wed, 15 May 2019 15:10:10 +0100 +Subject: [PATCH] target/i386: define md-clear bit + +md-clear is a new CPUID bit which is set when microcode provides the +mechanism to invoke a flush of various exploitable CPU buffers by invoking +the VERW instruction. + +Signed-off-by: Paolo Bonzini +Message-Id: <20190515141011.5315-2-berrange@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit b2ae52101fca7f9547ac2f388085dbc58f8fe1c0) +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index d6bb57d210..4ea78a4939 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1076,7 +1076,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .feat_names = { + NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", + NULL, NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ NULL, NULL, "md-clear", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, diff --git a/util-async-hold-AioContext-ref-to-prevent-use-after-.patch b/util-async-hold-AioContext-ref-to-prevent-use-after-.patch new file mode 100644 index 0000000..da4403c --- /dev/null +++ b/util-async-hold-AioContext-ref-to-prevent-use-after-.patch @@ -0,0 +1,63 @@ +From e965bc6c633921ab238b1f5ea64055975b24e2bb Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 23 Jul 2019 20:06:23 +0100 +Subject: [PATCH 4/5] util/async: hold AioContext ref to prevent use-after-free + +The tests/test-bdrv-drain /bdrv-drain/iothread/drain test case does the +following: + +1. The preadv coroutine calls aio_bh_schedule_oneshot() and then yields. +2. The one-shot BH executes in another AioContext. All it does is call + aio_co_wakeup(preadv_co). +3. The preadv coroutine is re-entered and returns. + +There is a race condition in aio_co_wake() where the preadv coroutine +returns and the test case destroys the preadv IOThread. aio_co_wake() +can still be running in the other AioContext and it performs an access +to the freed IOThread AioContext. + +Here is the race in aio_co_schedule(): + + QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, + co, co_scheduled_next); + <-- race: co may execute before we invoke qemu_bh_schedule()! + qemu_bh_schedule(ctx->co_schedule_bh); + +So if co causes ctx to be freed then we're in trouble. Fix this problem +by holding a reference to ctx. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20190723190623.21537-1-stefanha@redhat.com +Message-Id: <20190723190623.21537-1-stefanha@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry-picked from commit f0f81002873c06fdef9bb2a272ddfd26af65b851) +--- + util/async.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/util/async.c b/util/async.c +index c10642a..afc17fb 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -460,9 +460,17 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co) + abort(); + } + ++ /* The coroutine might run and release the last ctx reference before we ++ * invoke qemu_bh_schedule(). Take a reference to keep ctx alive until ++ * we're done. ++ */ ++ aio_context_ref(ctx); ++ + QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, + co, co_scheduled_next); + qemu_bh_schedule(ctx->co_schedule_bh); ++ ++ aio_context_unref(ctx); + } + + void aio_co_wake(struct Coroutine *co) +-- +1.8.3.1 + diff --git a/vhost-cancel-migration-when-vhost-user-restarted.patch b/vhost-cancel-migration-when-vhost-user-restarted.patch new file mode 100644 index 0000000..3855775 --- /dev/null +++ b/vhost-cancel-migration-when-vhost-user-restarted.patch @@ -0,0 +1,60 @@ +From 750328e01afe4776eaddacde406063978dbf1291 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 29 Jul 2019 16:22:12 +0800 +Subject: [PATCH] vhost: cancel migration when vhost-user restarted during + migraiton + +Qemu will abort when vhost-user process is restarted during migration +when vhost_log_global_start/stop is called. The reason is clear that +vhost_dev_set_log returns -1 because network connection is temporarily +lost. Let's cancel migraiton and report it to user in this abnormal +situation. + +Signed-off-by: Ying Fang +Reviewed-by: Gonglei +--- + hw/virtio/vhost.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 7f61018f..f302c506 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -26,6 +26,7 @@ + #include "hw/virtio/virtio-bus.h" + #include "hw/virtio/virtio-access.h" + #include "migration/blocker.h" ++#include "migration/migration.h" + #include "sysemu/dma.h" + #include "trace.h" + +@@ -808,20 +809,24 @@ static int vhost_migration_log(MemoryListener *listener, int enable) + static void vhost_log_global_start(MemoryListener *listener) + { + int r; ++ Error *errp = NULL; + + r = vhost_migration_log(listener, true); + if (r < 0) { +- abort(); ++ error_setg(&errp, "Failed to start vhost migration log"); ++ migrate_fd_error(migrate_get_current(), errp); + } + } + + static void vhost_log_global_stop(MemoryListener *listener) + { + int r; ++ Error *errp = NULL; + + r = vhost_migration_log(listener, false); + if (r < 0) { +- abort(); ++ error_setg(&errp, "Failed to stop vhost migration log"); ++ migrate_fd_error(migrate_get_current(), errp); + } + } + +-- +2.19.1 + diff --git a/vhost-fix-vhost_log-size-overflow-during-migration.patch b/vhost-fix-vhost_log-size-overflow-during-migration.patch new file mode 100644 index 0000000..2111f2a --- /dev/null +++ b/vhost-fix-vhost_log-size-overflow-during-migration.patch @@ -0,0 +1,69 @@ +From 240e647a14df9677b3a501f7b8b870e40aac3fd5 Mon Sep 17 00:00:00 2001 +From: Li Hangjing +Date: Mon, 3 Jun 2019 14:15:24 +0800 +Subject: [PATCH] vhost: fix vhost_log size overflow during migration + +When a guest which doesn't support multiqueue is migrated with a multi queues +vhost-user-blk deivce, a crash will occur like: + +0 qemu_memfd_alloc (name=, size=562949953421312, seals=, fd=0x7f87171fe8b4, errp=0x7f87171fe8a8) at util/memfd.c:153 +1 0x00007f883559d7cf in vhost_log_alloc (size=70368744177664, share=true) at hw/virtio/vhost.c:186 +2 0x00007f88355a0758 in vhost_log_get (listener=0x7f8838bd7940, enable=1) at qemu-2-12/hw/virtio/vhost.c:211 +3 vhost_dev_log_resize (listener=0x7f8838bd7940, enable=1) at hw/virtio/vhost.c:263 +4 vhost_migration_log (listener=0x7f8838bd7940, enable=1) at hw/virtio/vhost.c:787 +5 0x00007f88355463d6 in memory_global_dirty_log_start () at memory.c:2503 +6 0x00007f8835550577 in ram_init_bitmaps (f=0x7f88384ce600, opaque=0x7f8836024098) at migration/ram.c:2173 +7 ram_init_all (f=0x7f88384ce600, opaque=0x7f8836024098) at migration/ram.c:2192 +8 ram_save_setup (f=0x7f88384ce600, opaque=0x7f8836024098) at migration/ram.c:2219 +9 0x00007f88357a419d in qemu_savevm_state_setup (f=0x7f88384ce600) at migration/savevm.c:1002 +10 0x00007f883579fc3e in migration_thread (opaque=0x7f8837530400) at migration/migration.c:2382 +11 0x00007f8832447893 in start_thread () from /lib64/libpthread.so.0 +12 0x00007f8832178bfd in clone () from /lib64/libc.so.6 + +This is because vhost_get_log_size() returns a overflowed vhost-log size. +In this function, it uses the uninitialized variable vqs->used_phys and +vqs->used_size to get the vhost-log size. + +Signed-off-by: Li Hangjing +Reviewed-by: Xie Yongji +Reviewed-by: Chai Wen +Message-Id: <20190603061524.24076-1-lihangjing@baidu.com> +Cc: qemu-stable@nongnu.org +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-pick from commit 240e647a14df9677b3a501f7b8b870e40aac3fd5) +--- + hw/virtio/vhost.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 60747a6f93..bc899fc60e 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -131,6 +131,11 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + } + for (i = 0; i < dev->nvqs; ++i) { + struct vhost_virtqueue *vq = dev->vqs + i; ++ ++ if (!vq->used_phys && !vq->used_size) { ++ continue; ++ } ++ + vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, + range_get_last(vq->used_phys, vq->used_size)); + } +@@ -168,6 +173,11 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) + } + for (i = 0; i < dev->nvqs; ++i) { + struct vhost_virtqueue *vq = dev->vqs + i; ++ ++ if (!vq->used_phys && !vq->used_size) { ++ continue; ++ } ++ + uint64_t last = vq->used_phys + vq->used_size - 1; + log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); + } +-- +2.19.1 + diff --git a/vhost-user-scsi-prevent-using-uninitialized-vqs.patch b/vhost-user-scsi-prevent-using-uninitialized-vqs.patch new file mode 100644 index 0000000..9c4f923 --- /dev/null +++ b/vhost-user-scsi-prevent-using-uninitialized-vqs.patch @@ -0,0 +1,43 @@ +From 19d56f560879081de411f359417eaaa2998c9e3a Mon Sep 17 00:00:00 2001 +From: Raphael Norwitz +Date: Tue, 11 Jun 2019 17:35:17 -0700 +Subject: [PATCH 5/5] vhost-user-scsi: prevent using uninitialized vqs + +Of the 3 virtqueues, seabios only sets cmd, leaving ctrl +and event without a physical address. This can cause +vhost_verify_ring_part_mapping to return ENOMEM, causing +the following logs: + +qemu-system-x86_64: Unable to map available ring for ring 0 +qemu-system-x86_64: Verify ring failure on region 0 + +The qemu commit e6cc11d64fc998c11a4dfcde8fda3fc33a74d844 +has already resolved the issue for vhost scsi devices but +the fix was never applied to vhost-user scsi devices. + +Signed-off-by: Raphael Norwitz +Reviewed-by: Stefan Hajnoczi +Message-id: 1560299717-177734-1-git-send-email-raphael.norwitz@nutanix.com +Message-Id: <1560299717-177734-1-git-send-email-raphael.norwitz@nutanix.com> +Signed-off-by: Stefan Hajnoczi +(cherry-picked from commit 5d4c1ed3d46d7e2010b389fe5f3376f605182ab0) +--- + hw/scsi/vhost-user-scsi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c +index 8b1e687..241631f 100644 +--- a/hw/scsi/vhost-user-scsi.c ++++ b/hw/scsi/vhost-user-scsi.c +@@ -90,7 +90,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) + } + + vsc->dev.nvqs = 2 + vs->conf.num_queues; +- vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs); ++ vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs); + vsc->dev.vq_index = 0; + vsc->dev.backend_features = 0; + +-- +1.8.3.1 + diff --git a/virtio-balloon-Better-names-for-offset-variables.patch b/virtio-balloon-Better-names-for-offset-variables.patch new file mode 100644 index 0000000..7f9d3a9 --- /dev/null +++ b/virtio-balloon-Better-names-for-offset-variables.patch @@ -0,0 +1,113 @@ +From e6129b271b9dccca22c84870e313c315f2c70063 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 22 Jul 2019 15:41:06 +0200 +Subject: [PATCH] virtio-balloon: Better names for offset variables in + inflate/deflate code + +"host_page_base" is really confusing, let's make this clearer, also +rename the other offsets to indicate to which base they apply. + +offset -> mr_offset +ram_offset -> rb_offset +host_page_base -> rb_aligned_offset + +While at it, use QEMU_ALIGN_DOWN() instead of a handcrafted computation +and move the computation to the place where it is needed. + +Acked-by: David Gibson +Signed-off-by: David Hildenbrand +Message-Id: <20190722134108.22151-5-david@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit e6129b271b9dccca22c84870e313c315f2c70063) +--- + hw/virtio/virtio-balloon.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 04a7e6c772..f206cc8bf7 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -41,24 +41,23 @@ struct PartiallyBalloonedPage { + }; + + static void balloon_inflate_page(VirtIOBalloon *balloon, +- MemoryRegion *mr, hwaddr offset) ++ MemoryRegion *mr, hwaddr mr_offset) + { +- void *addr = memory_region_get_ram_ptr(mr) + offset; ++ void *addr = memory_region_get_ram_ptr(mr) + mr_offset; ++ ram_addr_t rb_offset, rb_aligned_offset; + RAMBlock *rb; + size_t rb_page_size; + int subpages; +- ram_addr_t ram_offset, host_page_base; + + /* XXX is there a better way to get to the RAMBlock than via a + * host address? */ +- rb = qemu_ram_block_from_host(addr, false, &ram_offset); ++ rb = qemu_ram_block_from_host(addr, false, &rb_offset); + rb_page_size = qemu_ram_pagesize(rb); +- host_page_base = ram_offset & ~(rb_page_size - 1); + + if (rb_page_size == BALLOON_PAGE_SIZE) { + /* Easy case */ + +- ram_block_discard_range(rb, ram_offset, rb_page_size); ++ ram_block_discard_range(rb, rb_offset, rb_page_size); + /* We ignore errors from ram_block_discard_range(), because it + * has already reported them, and failing to discard a balloon + * page is not fatal */ +@@ -74,11 +73,12 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, + warn_report_once( + "Balloon used with backing page size > 4kiB, this may not be reliable"); + ++ rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size); + subpages = rb_page_size / BALLOON_PAGE_SIZE; + + if (balloon->pbp + && (rb != balloon->pbp->rb +- || host_page_base != balloon->pbp->base)) { ++ || rb_aligned_offset != balloon->pbp->base)) { + /* We've partially ballooned part of a host page, but now + * we're trying to balloon part of a different one. Too hard, + * give up on the old partial page */ +@@ -91,10 +91,10 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, + size_t bitlen = BITS_TO_LONGS(subpages) * sizeof(unsigned long); + balloon->pbp = g_malloc0(sizeof(PartiallyBalloonedPage) + bitlen); + balloon->pbp->rb = rb; +- balloon->pbp->base = host_page_base; ++ balloon->pbp->base = rb_aligned_offset; + } + +- set_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, ++ set_bit((rb_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, + balloon->pbp->bitmap); + + if (bitmap_full(balloon->pbp->bitmap, subpages)) { +@@ -112,18 +112,18 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, + } + + static void balloon_deflate_page(VirtIOBalloon *balloon, +- MemoryRegion *mr, hwaddr offset) ++ MemoryRegion *mr, hwaddr mr_offset) + { +- void *addr = memory_region_get_ram_ptr(mr) + offset; ++ void *addr = memory_region_get_ram_ptr(mr) + mr_offset; ++ ram_addr_t rb_offset; + RAMBlock *rb; + size_t rb_page_size; +- ram_addr_t ram_offset; + void *host_addr; + int ret; + + /* XXX is there a better way to get to the RAMBlock than via a + * host address? */ +- rb = qemu_ram_block_from_host(addr, false, &ram_offset); ++ rb = qemu_ram_block_from_host(addr, false, &rb_offset); + rb_page_size = qemu_ram_pagesize(rb); + + if (balloon->pbp) { +-- +2.19.1 + diff --git a/virtio-balloon-Fix-QEMU-crashes-on-pagesize-BALLOON_.patch b/virtio-balloon-Fix-QEMU-crashes-on-pagesize-BALLOON_.patch new file mode 100644 index 0000000..7507b9c --- /dev/null +++ b/virtio-balloon-Fix-QEMU-crashes-on-pagesize-BALLOON_.patch @@ -0,0 +1,64 @@ +From 483f13524bb2a08b7ff6a7560b846564ed3b0c33 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 22 Jul 2019 15:41:04 +0200 +Subject: [PATCH] virtio-balloon: Fix QEMU crashes on pagesize > + BALLOON_PAGE_SIZE + +We are using the wrong functions to set/clear bits, effectively touching +multiple bits, writing out of range of the bitmap, resulting in memory +corruptions. We have to use set_bit()/clear_bit() instead. + +Can easily be reproduced by starting a qemu guest on hugetlbfs memory, +inflating the balloon. QEMU crashes. This never could have worked +properly - especially, also pages would have been discarded when the +first sub-page would be inflated (the whole bitmap would be set). + +While testing I realized, that on hugetlbfs it is pretty much impossible +to discard a page - the guest just frees the 4k sub-pages in random order +most of the time. I was only able to discard a hugepage a handful of +times - so I hope that now works correctly. + +Fixes: ed48c59875b6 ("virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size") +Fixes: b27b32391404 ("virtio-balloon: Fix possible guest memory corruption with inflates & deflates") +Cc: qemu-stable@nongnu.org #v4.0.0 +Acked-by: David Gibson +Signed-off-by: David Hildenbrand +Message-Id: <20190722134108.22151-3-david@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-pick from commit 483f13524bb2a08b7ff6a7560b846564ed3b0c33) +--- + hw/virtio/virtio-balloon.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 515abf6553..a78d2d2184 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -94,9 +94,8 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, + balloon->pbp->base = host_page_base; + } + +- bitmap_set(balloon->pbp->bitmap, +- (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, +- subpages); ++ set_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, ++ balloon->pbp->bitmap); + + if (bitmap_full(balloon->pbp->bitmap, subpages)) { + /* We've accumulated a full host page, we can actually discard +@@ -140,9 +139,8 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, + * for a guest to do this in practice, but handle it anyway, + * since getting it wrong could mean discarding memory the + * guest is still using. */ +- bitmap_clear(balloon->pbp->bitmap, +- (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, +- subpages); ++ clear_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, ++ balloon->pbp->bitmap); + + if (bitmap_empty(balloon->pbp->bitmap, subpages)) { + g_free(balloon->pbp); +-- +2.19.1 + diff --git a/virtio-balloon-Fix-wrong-sign-extension-of-PFNs.patch b/virtio-balloon-Fix-wrong-sign-extension-of-PFNs.patch new file mode 100644 index 0000000..2549799 --- /dev/null +++ b/virtio-balloon-Fix-wrong-sign-extension-of-PFNs.patch @@ -0,0 +1,41 @@ +From ffa207d08253ffffb3993a1dbe09e40af4fc91f1 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 22 Jul 2019 15:41:03 +0200 +Subject: [PATCH] virtio-balloon: Fix wrong sign extension of PFNs + +If we directly cast from int to uint64_t, we will first sign-extend to +an int64_t, which is wrong. We actually want to treat the PFNs like +unsigned values. + +As far as I can see, this dates back to the initial virtio-balloon +commit, but wasn't triggered as fairly big guests would be required. + +Cc: qemu-stable@nongnu.org +Reported-by: Michael S. Tsirkin +Signed-off-by: David Hildenbrand +Message-Id: <20190722134108.22151-2-david@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: David Gibson +(cherry-picked from commit ffa207d08253ffffb3993a1dbe09e40af4fc91f1) +--- + hw/virtio/virtio-balloon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index e85d1c0d5c..515abf6553 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -343,8 +343,8 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) + } + + while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) { ++ unsigned int p = virtio_ldl_p(vdev, &pfn); + hwaddr pa; +- int p = virtio_ldl_p(vdev, &pfn); + + pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT; + offset += 4; +-- +2.19.1 + diff --git a/virtio-balloon-Rework-pbp-tracking-data.patch b/virtio-balloon-Rework-pbp-tracking-data.patch new file mode 100644 index 0000000..1930ac1 --- /dev/null +++ b/virtio-balloon-Rework-pbp-tracking-data.patch @@ -0,0 +1,144 @@ +From 1c5cfc2b7153dd72bf4b8ddc456408eb2b9b66d8 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 22 Jul 2019 15:41:07 +0200 +Subject: [PATCH] virtio-balloon: Rework pbp tracking data + +Using the address of a RAMBlock to test for a matching pbp is not really +safe. Instead, let's use the guest physical address of the base page +along with the page size (via the number of subpages). + +Also, let's allocate the bitmap separately. This makes the code +easier to read and maintain - we can reuse bitmap_new(). + +Prepare the code to move the PBP out of the device. + +Fixes: ed48c59875b6 ("virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size") +Fixes: b27b32391404 ("virtio-balloon: Fix possible guest memory corruption with inflates & deflates") +Cc: qemu-stable@nongnu.org #v4.0.0 +Signed-off-by: David Hildenbrand +Message-Id: <20190722134108.22151-6-david@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit 1c5cfc2b7153dd72bf4b8ddc456408eb2b9b66d8) +--- + hw/virtio/virtio-balloon.c | 69 +++++++++++++++++++++++++------------- + 1 file changed, 46 insertions(+), 23 deletions(-) + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index f206cc8bf7..40d493a31a 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -35,16 +35,44 @@ + #define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT) + + struct PartiallyBalloonedPage { +- RAMBlock *rb; +- ram_addr_t base; +- unsigned long bitmap[]; ++ ram_addr_t base_gpa; ++ long subpages; ++ unsigned long *bitmap; + }; + ++static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp) ++{ ++ if (!pbp) { ++ return; ++ } ++ g_free(pbp->bitmap); ++ g_free(pbp); ++} ++ ++static PartiallyBalloonedPage *virtio_balloon_pbp_alloc(ram_addr_t base_gpa, ++ long subpages) ++{ ++ PartiallyBalloonedPage *pbp = g_new0(PartiallyBalloonedPage, 1); ++ ++ pbp->base_gpa = base_gpa; ++ pbp->subpages = subpages; ++ pbp->bitmap = bitmap_new(subpages); ++ ++ return pbp; ++} ++ ++static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, ++ ram_addr_t base_gpa, long subpages) ++{ ++ return pbp->subpages == subpages && pbp->base_gpa == base_gpa; ++} ++ + static void balloon_inflate_page(VirtIOBalloon *balloon, + MemoryRegion *mr, hwaddr mr_offset) + { + void *addr = memory_region_get_ram_ptr(mr) + mr_offset; +- ram_addr_t rb_offset, rb_aligned_offset; ++ ram_addr_t rb_offset, rb_aligned_offset, base_gpa; ++ PartiallyBalloonedPage **pbp = &balloon->pbp; + RAMBlock *rb; + size_t rb_page_size; + int subpages; +@@ -75,39 +103,34 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, + + rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size); + subpages = rb_page_size / BALLOON_PAGE_SIZE; ++ base_gpa = memory_region_get_ram_addr(mr) + mr_offset - ++ (rb_offset - rb_aligned_offset); + +- if (balloon->pbp +- && (rb != balloon->pbp->rb +- || rb_aligned_offset != balloon->pbp->base)) { ++ if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa, subpages)) { + /* We've partially ballooned part of a host page, but now + * we're trying to balloon part of a different one. Too hard, + * give up on the old partial page */ +- g_free(balloon->pbp); +- balloon->pbp = NULL; ++ virtio_balloon_pbp_free(*pbp); ++ *pbp = NULL; + } + +- if (!balloon->pbp) { +- /* Starting on a new host page */ +- size_t bitlen = BITS_TO_LONGS(subpages) * sizeof(unsigned long); +- balloon->pbp = g_malloc0(sizeof(PartiallyBalloonedPage) + bitlen); +- balloon->pbp->rb = rb; +- balloon->pbp->base = rb_aligned_offset; ++ if (!*pbp) { ++ *pbp = virtio_balloon_pbp_alloc(base_gpa, subpages); + } + +- set_bit((rb_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, +- balloon->pbp->bitmap); ++ set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE, ++ (*pbp)->bitmap); + +- if (bitmap_full(balloon->pbp->bitmap, subpages)) { ++ if (bitmap_full((*pbp)->bitmap, subpages)) { + /* We've accumulated a full host page, we can actually discard + * it now */ + +- ram_block_discard_range(rb, balloon->pbp->base, rb_page_size); ++ ram_block_discard_range(rb, rb_aligned_offset, rb_page_size); + /* We ignore errors from ram_block_discard_range(), because it + * has already reported them, and failing to discard a balloon + * page is not fatal */ +- +- g_free(balloon->pbp); +- balloon->pbp = NULL; ++ virtio_balloon_pbp_free(*pbp); ++ *pbp = NULL; + } + } + +@@ -128,7 +151,7 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, + + if (balloon->pbp) { + /* Let's play safe and always reset the pbp on deflation requests. */ +- g_free(balloon->pbp); ++ virtio_balloon_pbp_free(balloon->pbp); + balloon->pbp = NULL; + } + +-- +2.19.1 + diff --git a/virtio-balloon-Simplify-deflate-with-pbp.patch b/virtio-balloon-Simplify-deflate-with-pbp.patch new file mode 100644 index 0000000..673ae1d --- /dev/null +++ b/virtio-balloon-Simplify-deflate-with-pbp.patch @@ -0,0 +1,67 @@ +From 2ffc49eea1bbd454913a88a0ad872c2649b36950 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 22 Jul 2019 15:41:05 +0200 +Subject: [PATCH] virtio-balloon: Simplify deflate with pbp + +Let's simplify this - the case we are optimizing for is very hard to +trigger and not worth the effort. If we're switching from inflation to +deflation, let's reset the pbp. + +Acked-by: David Gibson +Signed-off-by: David Hildenbrand +Message-Id: <20190722134108.22151-4-david@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit 2ffc49eea1bbd454913a88a0ad872c2649b36950) +--- + hw/virtio/virtio-balloon.c | 26 +++++--------------------- + 1 file changed, 5 insertions(+), 21 deletions(-) + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index a78d2d2184..04a7e6c772 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -117,7 +117,7 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, + void *addr = memory_region_get_ram_ptr(mr) + offset; + RAMBlock *rb; + size_t rb_page_size; +- ram_addr_t ram_offset, host_page_base; ++ ram_addr_t ram_offset; + void *host_addr; + int ret; + +@@ -125,27 +125,11 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, + * host address? */ + rb = qemu_ram_block_from_host(addr, false, &ram_offset); + rb_page_size = qemu_ram_pagesize(rb); +- host_page_base = ram_offset & ~(rb_page_size - 1); +- +- if (balloon->pbp +- && rb == balloon->pbp->rb +- && host_page_base == balloon->pbp->base) { +- int subpages = rb_page_size / BALLOON_PAGE_SIZE; + +- /* +- * This means the guest has asked to discard some of the 4kiB +- * subpages of a host page, but then changed its mind and +- * asked to keep them after all. It's exceedingly unlikely +- * for a guest to do this in practice, but handle it anyway, +- * since getting it wrong could mean discarding memory the +- * guest is still using. */ +- clear_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, +- balloon->pbp->bitmap); +- +- if (bitmap_empty(balloon->pbp->bitmap, subpages)) { +- g_free(balloon->pbp); +- balloon->pbp = NULL; +- } ++ if (balloon->pbp) { ++ /* Let's play safe and always reset the pbp on deflation requests. */ ++ g_free(balloon->pbp); ++ balloon->pbp = NULL; + } + + host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1)); +-- +2.19.1 + diff --git a/virtio-balloon-Use-temporary-PBP-only.patch b/virtio-balloon-Use-temporary-PBP-only.patch new file mode 100644 index 0000000..9d1553b --- /dev/null +++ b/virtio-balloon-Use-temporary-PBP-only.patch @@ -0,0 +1,132 @@ +From a8cd64d488325f3be5c4ddec4bf07efb3b8c7330 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 22 Jul 2019 15:41:08 +0200 +Subject: [PATCH] virtio-balloon: Use temporary PBP only + +We still have multiple issues in the current code +- The PBP is not freed during unrealize() +- The PBP is not reset on device resets: After a reset, the PBP is stale. +- We are not indicating VIRTIO_BALLOON_F_MUST_TELL_HOST, therefore + guests (esp. legacy guests) will reuse pages without deflating, + turning the PBP stale. Adding that would require compat handling. + +Instead, let's use the PBP only temporarily, when processing one bulk of +inflation requests. This will keep guest_page_size > 4k working (with +Linux guests). There is nothing to do for deflation requests anymore. +The pbp is only used for a limited amount of time. + +Fixes: ed48c59875b6 ("virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size") +Cc: qemu-stable@nongnu.org #v4.0.0 +Suggested-by: Michael S. Tsirkin +Signed-off-by: David Hildenbrand +Message-Id: <20190722134108.22151-7-david@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: David Gibson +(cherry-picked from commit a8cd64d488325f3be5c4ddec4bf07efb3b8c7330) +--- + hw/virtio/virtio-balloon.c | 21 +++++++++------------ + include/hw/virtio/virtio-balloon.h | 3 --- + 2 files changed, 9 insertions(+), 15 deletions(-) + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index ccd766d2..d4b4a05b 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -34,11 +34,11 @@ + + #define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT) + +-struct PartiallyBalloonedPage { ++typedef struct PartiallyBalloonedPage { + ram_addr_t base_gpa; + long subpages; + unsigned long *bitmap; +-}; ++} PartiallyBalloonedPage; + + static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp) + { +@@ -68,11 +68,11 @@ static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, + } + + static void balloon_inflate_page(VirtIOBalloon *balloon, +- MemoryRegion *mr, hwaddr mr_offset) ++ MemoryRegion *mr, hwaddr mr_offset, ++ PartiallyBalloonedPage **pbp) + { + void *addr = memory_region_get_ram_ptr(mr) + mr_offset; + ram_addr_t rb_offset, rb_aligned_offset, base_gpa; +- PartiallyBalloonedPage **pbp = &balloon->pbp; + RAMBlock *rb; + size_t rb_page_size; + int subpages; +@@ -149,12 +149,6 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, + rb = qemu_ram_block_from_host(addr, false, &rb_offset); + rb_page_size = qemu_ram_pagesize(rb); + +- if (balloon->pbp) { +- /* Let's play safe and always reset the pbp on deflation requests. */ +- virtio_balloon_pbp_free(balloon->pbp); +- balloon->pbp = NULL; +- } +- + host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1)); + + /* When a page is deflated, we hint the whole host page it lives +@@ -336,6 +330,7 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, + static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOBalloon *s = VIRTIO_BALLOON(vdev); ++ PartiallyBalloonedPage *pbp = NULL; + VirtQueueElement *elem; + MemoryRegionSection section; + +@@ -344,7 +339,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) + uint32_t pfn; + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { +- return; ++ break; + } + + while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) { +@@ -373,7 +368,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) + if (!qemu_balloon_is_inhibited()) { + if (vq == s->ivq) { + balloon_inflate_page(s, section.mr, +- section.offset_within_region); ++ section.offset_within_region, &pbp); + } else if (vq == s->dvq) { + balloon_deflate_page(s, section.mr, section.offset_within_region); + } else { +@@ -387,6 +382,8 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) + virtio_notify(vdev, vq); + g_free(elem); + } ++ ++ virtio_balloon_pbp_free(pbp); + } + + static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) +diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h +index 1afafb12..503349a3 100644 +--- a/include/hw/virtio/virtio-balloon.h ++++ b/include/hw/virtio/virtio-balloon.h +@@ -33,8 +33,6 @@ typedef struct virtio_balloon_stat_modern { + uint64_t val; + } VirtIOBalloonStatModern; + +-typedef struct PartiallyBalloonedPage PartiallyBalloonedPage; +- + enum virtio_balloon_free_page_report_status { + FREE_PAGE_REPORT_S_STOP = 0, + FREE_PAGE_REPORT_S_REQUESTED = 1, +@@ -70,7 +68,6 @@ typedef struct VirtIOBalloon { + int64_t stats_last_update; + int64_t stats_poll_interval; + uint32_t host_features; +- PartiallyBalloonedPage *pbp; + } VirtIOBalloon; + + #endif diff --git a/virtio-balloon-don-t-track-subpages-for-the-PBP.patch b/virtio-balloon-don-t-track-subpages-for-the-PBP.patch new file mode 100644 index 0000000..942eb76 --- /dev/null +++ b/virtio-balloon-don-t-track-subpages-for-the-PBP.patch @@ -0,0 +1,62 @@ +From 9a7ca8a7c920360db9dcaf616ca6f1440c025043 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Thu, 25 Jul 2019 13:36:38 +0200 +Subject: [PATCH] virtio-balloon: don't track subpages for the PBP + +As ramblocks cannot get removed/readded while we are processing a bulk +of inflation requests, there is no more need to track the page size +in form of the number of subpages. + +Suggested-by: David Gibson +Signed-off-by: David Hildenbrand +Message-Id: <20190725113638.4702-8-david@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit 9a7ca8a7c920360db9dcaf616ca6f1440c025043) +--- + hw/virtio/virtio-balloon.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index a6282d58d4..fe9664e42c 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -36,7 +36,6 @@ + + typedef struct PartiallyBalloonedPage { + ram_addr_t base_gpa; +- long subpages; + unsigned long *bitmap; + } PartiallyBalloonedPage; + +@@ -55,16 +54,15 @@ static PartiallyBalloonedPage *virtio_balloon_pbp_alloc(ram_addr_t base_gpa, + PartiallyBalloonedPage *pbp = g_new0(PartiallyBalloonedPage, 1); + + pbp->base_gpa = base_gpa; +- pbp->subpages = subpages; + pbp->bitmap = bitmap_new(subpages); + + return pbp; + } + + static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, +- ram_addr_t base_gpa, long subpages) ++ ram_addr_t base_gpa) + { +- return pbp->subpages == subpages && pbp->base_gpa == base_gpa; ++ return pbp->base_gpa == base_gpa; + } + + static void balloon_inflate_page(VirtIOBalloon *balloon, +@@ -106,7 +104,7 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, + base_gpa = memory_region_get_ram_addr(mr) + mr_offset - + (rb_offset - rb_aligned_offset); + +- if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa, subpages)) { ++ if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa)) { + /* We've partially ballooned part of a host page, but now + * we're trying to balloon part of a different one. Too hard, + * give up on the old partial page */ +-- +2.19.1 + diff --git a/virtio-balloon-free-pbp-more-aggressively.patch b/virtio-balloon-free-pbp-more-aggressively.patch new file mode 100644 index 0000000..8ba4bcd --- /dev/null +++ b/virtio-balloon-free-pbp-more-aggressively.patch @@ -0,0 +1,130 @@ +From 1b47b37c33ec01ae1efc527f4c97f97f93723bc4 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Thu, 25 Jul 2019 07:54:25 -0400 +Subject: [PATCH] virtio-balloon: free pbp more aggressively + +Previous patches switched to a temporary pbp but that does not go far +enough: after device uses a buffer, guest is free to reuse it, so +tracking the page and freeing it later is wrong. + +Free and reset the pbp after we push each element. + +Fixes: ed48c59875b6 ("virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size") +Cc: qemu-stable@nongnu.org #v4.0.0 +Cc: David Hildenbrand +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit 1b47b37c33ec01ae1efc527f4c97f97f93723bc4) +--- + hw/virtio/virtio-balloon.c | 37 ++++++++++++++++--------------------- + 1 file changed, 16 insertions(+), 21 deletions(-) + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index fe9664e42c..25de154307 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -41,22 +41,19 @@ typedef struct PartiallyBalloonedPage { + + static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp) + { +- if (!pbp) { ++ if (!pbp->bitmap) { + return; + } + g_free(pbp->bitmap); +- g_free(pbp); ++ pbp->bitmap = NULL; + } + +-static PartiallyBalloonedPage *virtio_balloon_pbp_alloc(ram_addr_t base_gpa, +- long subpages) ++static void virtio_balloon_pbp_alloc(PartiallyBalloonedPage *pbp, ++ ram_addr_t base_gpa, ++ long subpages) + { +- PartiallyBalloonedPage *pbp = g_new0(PartiallyBalloonedPage, 1); +- + pbp->base_gpa = base_gpa; + pbp->bitmap = bitmap_new(subpages); +- +- return pbp; + } + + static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, +@@ -67,7 +64,7 @@ static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, + + static void balloon_inflate_page(VirtIOBalloon *balloon, + MemoryRegion *mr, hwaddr mr_offset, +- PartiallyBalloonedPage **pbp) ++ PartiallyBalloonedPage *pbp) + { + void *addr = memory_region_get_ram_ptr(mr) + mr_offset; + ram_addr_t rb_offset, rb_aligned_offset, base_gpa; +@@ -104,22 +101,21 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, + base_gpa = memory_region_get_ram_addr(mr) + mr_offset - + (rb_offset - rb_aligned_offset); + +- if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa)) { ++ if (pbp->bitmap && !virtio_balloon_pbp_matches(pbp, base_gpa)) { + /* We've partially ballooned part of a host page, but now + * we're trying to balloon part of a different one. Too hard, + * give up on the old partial page */ +- virtio_balloon_pbp_free(*pbp); +- *pbp = NULL; ++ virtio_balloon_pbp_free(pbp); + } + +- if (!*pbp) { +- *pbp = virtio_balloon_pbp_alloc(base_gpa, subpages); ++ if (!pbp->bitmap) { ++ virtio_balloon_pbp_alloc(pbp, base_gpa, subpages); + } + + set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE, +- (*pbp)->bitmap); ++ pbp->bitmap); + +- if (bitmap_full((*pbp)->bitmap, subpages)) { ++ if (bitmap_full(pbp->bitmap, subpages)) { + /* We've accumulated a full host page, we can actually discard + * it now */ + +@@ -127,8 +123,7 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, + /* We ignore errors from ram_block_discard_range(), because it + * has already reported them, and failing to discard a balloon + * page is not fatal */ +- virtio_balloon_pbp_free(*pbp); +- *pbp = NULL; ++ virtio_balloon_pbp_free(pbp); + } + } + +@@ -328,13 +323,14 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, + static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOBalloon *s = VIRTIO_BALLOON(vdev); +- PartiallyBalloonedPage *pbp = NULL; + VirtQueueElement *elem; + MemoryRegionSection section; + + for (;;) { ++ PartiallyBalloonedPage pbp = {}; + size_t offset = 0; + uint32_t pfn; ++ + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + break; +@@ -379,9 +375,8 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) + virtqueue_push(vq, elem, offset); + virtio_notify(vdev, vq); + g_free(elem); ++ virtio_balloon_pbp_free(&pbp); + } +- +- virtio_balloon_pbp_free(pbp); + } + + static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) +-- +2.19.1 + diff --git a/virtio-pci-fix-missing-device-properties.patch b/virtio-pci-fix-missing-device-properties.patch new file mode 100644 index 0000000..3281d04 --- /dev/null +++ b/virtio-pci-fix-missing-device-properties.patch @@ -0,0 +1,101 @@ +From b728ea5f2597f97088db8517576caa2877118cb0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 5 Aug 2019 16:04:40 +0800 +Subject: [PATCH] virtio-pci: fix missing device properties +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Since commit a4ee4c8baa37154 ("virtio: Helper for registering virtio +device types"), virtio-gpu-pci, virtio-vga, and virtio-crypto-pci lost +some properties: "ioeventfd" and "vectors". This may cause various +issues, such as failing migration or invalid properties. + +Since those VirtioPCI devices do not have a base name, their class are +initialized with virtio_pci_generic_base_class_init(). However, if the +VirtioPCIDeviceTypeInfo provided a class_init which sets dc->props, +the properties were overwritten by virtio_pci_generic_class_init(). + +Instead, introduce an intermediary base-type to register the generic +properties. + +Fixes: a4ee4c8baa37154f42b4dc6a13fee79268d15238 +Cc: qemu-stable@nongnu.org +Signed-off-by: Marc-AndrĂ© Lureau +Message-Id: <20190625232333.30752-1-marcandre.lureau@redhat.com> +(cherry-pick from commit 683c1d89efd1eeb111c129a9a91f629b94d90d45) +--- + hw/virtio/virtio-pci.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index cb44e19b..497092e8 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1905,13 +1905,6 @@ static void virtio_pci_generic_class_init(ObjectClass *klass, void *data) + dc->props = virtio_pci_generic_properties; + } + +-/* Used when the generic type and the base type is the same */ +-static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data) +-{ +- virtio_pci_base_class_init(klass, data); +- virtio_pci_generic_class_init(klass, NULL); +-} +- + static void virtio_pci_transitional_instance_init(Object *obj) + { + VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); +@@ -1930,14 +1923,13 @@ static void virtio_pci_non_transitional_instance_init(Object *obj) + + void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) + { ++ char *base_name = NULL; + TypeInfo base_type_info = { + .name = t->base_name, + .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI, + .instance_size = t->instance_size, + .instance_init = t->instance_init, + .class_size = t->class_size, +- .class_init = virtio_pci_base_class_init, +- .class_data = (void *)t, + .abstract = true, + }; + TypeInfo generic_type_info = { +@@ -1953,13 +1945,20 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) + + if (!base_type_info.name) { + /* No base type -> register a single generic device type */ +- base_type_info.name = t->generic_name; +- base_type_info.class_init = virtio_pci_generic_base_class_init; +- base_type_info.interfaces = generic_type_info.interfaces; +- base_type_info.abstract = false; +- generic_type_info.name = NULL; ++ /* use intermediate %s-base-type to add generic device props */ ++ base_name = g_strdup_printf("%s-base-type", t->generic_name); ++ base_type_info.name = base_name; ++ base_type_info.class_init = virtio_pci_generic_class_init; ++ ++ generic_type_info.parent = base_name; ++ generic_type_info.class_init = virtio_pci_base_class_init; ++ generic_type_info.class_data = (void *)t; ++ + assert(!t->non_transitional_name); + assert(!t->transitional_name); ++ } else { ++ base_type_info.class_init = virtio_pci_base_class_init; ++ base_type_info.class_data = (void *)t; + } + + type_register(&base_type_info); +@@ -1997,6 +1996,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) + }; + type_register(&transitional_type_info); + } ++ g_free(base_name); + } + + /* virtio-pci-bus */ +-- +2.19.1 + diff --git a/vl-Fix-drive-blockdev-persistent-reservation-managem.patch b/vl-Fix-drive-blockdev-persistent-reservation-managem.patch new file mode 100644 index 0000000..d05c300 --- /dev/null +++ b/vl-Fix-drive-blockdev-persistent-reservation-managem.patch @@ -0,0 +1,55 @@ +From 9ea18ed25a36527167e9676f25d983df5e7f76e6 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Tue, 4 Jun 2019 17:12:50 +0200 +Subject: [PATCH] vl: Fix -drive / -blockdev persistent reservation management + +qemu-system-FOO's main() acts on command line arguments in its own +idiosyncratic order. There's not much method to its madness. +Whenever we find a case where one kind of command line argument needs +to refer to something created for another kind later, we rejigger the +order. + +Recent commit cda4aa9a5a "vl: Create block backends before setting +machine properties" was such a rejigger. Block backends are now +created before "delayed" objects. This broke persistent reservation +management. Reproducer: + + $ qemu-system-x86_64 -object pr-manager-helper,id=pr-helper0,path=/tmp/pr-helper0.sock-drive -drive file=/dev/mapper/crypt,file.pr-manager=pr-helper0,format=raw,if=none,id=drive-scsi0-0-0-2 + qemu-system-x86_64: -drive file=/dev/mapper/crypt,file.pr-manager=pr-helper0,format=raw,if=none,id=drive-scsi0-0-0-2: No persistent reservation manager with id 'pr-helper0' + +The delayed pr-manager-helper object is created too late for use by +-drive or -blockdev. Normal objects are still created in time. + +pr-manager-helper has always been a delayed object (commit 7c9e527659 +"scsi, file-posix: add support for persistent reservation +management"). Turns out there's no real reason for that. Make it a +normal object. + +Fixes: cda4aa9a5a08777cf13e164c0543bd4888b8adce +Signed-off-by: Markus Armbruster +Message-Id: <20190604151251.9903-2-armbru@redhat.com> +Reviewed-by: Michal Privoznik +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry-picked from commit 9ea18ed25a36527167e9676f25d983df5e7f76e6) +--- + vl.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/vl.c b/vl.c +index f023a8ca73..cc6246d2af 100644 +--- a/vl.c ++++ b/vl.c +@@ -2751,8 +2751,7 @@ static bool object_create_initial(const char *type, QemuOpts *opts) + exit(0); + } + +- if (g_str_equal(type, "rng-egd") || +- g_str_has_prefix(type, "pr-manager-")) { ++ if (g_str_equal(type, "rng-egd")) { + return false; + } + +-- +2.19.1 + diff --git a/xhci-Fix-memory-leak-in-xhci_address_slot.patch b/xhci-Fix-memory-leak-in-xhci_address_slot.patch new file mode 100644 index 0000000..1d0f858 --- /dev/null +++ b/xhci-Fix-memory-leak-in-xhci_address_slot.patch @@ -0,0 +1,47 @@ +From c0de0a04d03183f524c2f60cda8ae1e886197a7d Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Tue, 27 Aug 2019 10:54:48 +0800 +Subject: [PATCH] xhci: Fix memory leak in xhci_address_slot + +Address Sanitizer shows memory leak in xhci_address_slot +hw/usb/hcd-xhci.c:2156 and the stack is as bellow: + +Direct leak of 64 byte(s) in 4 object(s) allocated from: + #0 0xffff91c6f5ab in realloc (/lib64/libasan.so.4+0xd35ab) + #1 0xffff91987243 in g_realloc (/lib64/libglib-2.0.so.0+0x57243) + #2 0xaaaab0b26a1f in qemu_iovec_add util/iov.c:296 + #3 0xaaaab07e5ce3 in xhci_address_slot hw/usb/hcd-xhci.c:2156 + #4 0xaaaab07e5ce3 in xhci_process_commands hw/usb/hcd-xhci.c:2493 + #5 0xaaaab00058d7 in memory_region_write_accessor qemu/memory.c:507 + #6 0xaaaab0000d87 in access_with_adjusted_size memory.c:573 + #7 0xaaaab000abcf in memory_region_dispatch_write memory.c:1516 + #8 0xaaaaaff59947 in flatview_write_continue exec.c:3367 + #9 0xaaaaaff59c33 in flatview_write exec.c:3406 + #10 0xaaaaaff63b3b in address_space_write exec.c:3496 + #11 0xaaaab002f263 in kvm_cpu_exec accel/kvm/kvm-all.c:2288 + #12 0xaaaaaffee427 in qemu_kvm_cpu_thread_fn cpus.c:1290 + #13 0xaaaab0b1a943 in qemu_thread_start util/qemu-thread-posix.c:502 + #14 0xffff908ce8bb in start_thread (/lib64/libpthread.so.0+0x78bb) + #15 0xffff908165cb in thread_start (/lib64/libc.so.6+0xd55cb) + +Cc: zhanghailiang +Signed-off-by: Ying Fang +--- + hw/usb/hcd-xhci.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index f578264948..471759cd4c 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -2161,6 +2161,7 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid, + DeviceOutRequest | USB_REQ_SET_ADDRESS, + slotid, 0, 0, NULL); + assert(p.status != USB_RET_ASYNC); ++ usb_packet_cleanup(&p); + } + + res = xhci_enable_ep(xhci, slotid, 1, octx+32, ep0_ctx); +-- +2.19.1 + diff --git a/xhci-Fix-memory-leak-in-xhci_kick_epctx.patch b/xhci-Fix-memory-leak-in-xhci_kick_epctx.patch new file mode 100644 index 0000000..398b1bc --- /dev/null +++ b/xhci-Fix-memory-leak-in-xhci_kick_epctx.patch @@ -0,0 +1,44 @@ +From 2212f37e0e477d8da0cff02cfc8b7a921ca11bef Mon Sep 17 00:00:00 2001 +From: fangying +Date: Wed, 28 Aug 2019 14:02:22 +0800 +Subject: [PATCH] xhci: Fix memory leak in xhci_kick_epctx + +Address Sanitizer shows memory leak in xhci_kick_epctx hw/usb/hcd-xhci.c:1912. +A sglist is leaked when a packet is retired and returns USB_RET_NAK status. +The leak stack is as bellow: + +Direct leak of 2688 byte(s) in 168 object(s) allocated from: + #0 0xffffae8b11db in __interceptor_malloc (/lib64/libasan.so.4+0xd31db) + #1 0xffffae5c9163 in g_malloc (/lib64/libglib-2.0.so.0+0x57163) + #2 0xaaaabb6fb3f7 in qemu_sglist_init dma-helpers.c:43 + #3 0xaaaabba705a7 in pci_dma_sglist_init include/hw/pci/pci.h:837 + #4 0xaaaabba705a7 in xhci_xfer_create_sgl hw/usb/hcd-xhci.c:1443 + #5 0xaaaabba705a7 in xhci_setup_packet hw/usb/hcd-xhci.c:1615 + #6 0xaaaabba77a6f in xhci_kick_epctx hw/usb/hcd-xhci.c:1912 + #7 0xaaaabbdaad27 in timerlist_run_timers util/qemu-timer.c:592 + #8 0xaaaabbdab19f in qemu_clock_run_timers util/qemu-timer.c:606 + #9 0xaaaabbdab19f in qemu_clock_run_all_timers util/qemu-timer.c:692 + #10 0xaaaabbdab9a3 in main_loop_wait util/main-loop.c:524 + #11 0xaaaabb6ff5e7 in main_loop vl.c:1806 + #12 0xaaaabb1e1453 in main vl.c:4488 + +Signed-off-by: Ying Fang +--- + hw/usb/hcd-xhci.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index 6e1ec786..e10fbd3d 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -1911,6 +1911,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + } + usb_handle_packet(xfer->packet.ep->dev, &xfer->packet); + if (xfer->packet.status == USB_RET_NAK) { ++ xhci_xfer_unmap(xfer); + return; + } + xhci_try_complete_packet(xfer); +-- +2.19.1 +