skylark/guestinfo-Take-another-VM-stop-reason-to-account.patch
Keqian Zhu 52abc18f1c qos: More bugfixes for qos management
Take another VM stop reason to account, add aditional setting
for cpu QOS and add a job to sync VM pids to resctrl.

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
2022-08-12 18:28:14 +08:00

97 lines
3.9 KiB
Diff

From bdd805eec082062e042acda6caf38ca17dbaec50 Mon Sep 17 00:00:00 2001
From: Keqian Zhu <zhukeqian1@huawei.com>
Date: Thu, 4 Aug 2022 14:46:33 -0400
Subject: [PATCH 1/3] guestinfo: Take another VM stop reason to account
When VM is closed by OpenStack, the exception code is not
VIR_ERR_NO_DOMAIN but exception message contains "domain
is not running".
And refactor code to make it more readable.
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
---
data_collector/guestinfo.py | 48 +++++++++++++++++--------------------
1 file changed, 22 insertions(+), 26 deletions(-)
diff --git a/data_collector/guestinfo.py b/data_collector/guestinfo.py
index 415b3f6..38fa827 100644
--- a/data_collector/guestinfo.py
+++ b/data_collector/guestinfo.py
@@ -28,6 +28,7 @@ DEFAULT_PRIORITY = "machine"
HIGH_PRIORITY = "high_prio_machine"
LOW_PRIORITY = "low_prio_machine"
PIDS_CGRP_PATH = "/sys/fs/cgroup/pids"
+DOMAIN_STOP_MSG = "domain is not running"
class DomainInfo:
@@ -141,43 +142,38 @@ class GuestInfo:
self.running_domain_in_cpus.append([])
self.get_all_active_domain(conn)
-
for dom in self.domain_online:
self.vm_online_dict[dom.ID()] = dom
+ # Remove ever see but now stopped domains
+ for vm_id in list(self.vm_dict):
+ if vm_id not in self.vm_online_dict:
+ del self.vm_dict[vm_id]
+
for vm_id in self.vm_online_dict:
- ret = -1
- if vm_id in self.vm_dict:
- try:
+ try:
+ if vm_id in self.vm_dict:
ret = self.vm_dict.get(vm_id).update_domain_info(self.vm_online_dict.get(vm_id), host_topo)
- except libvirt.libvirtError as e:
- if e.get_error_code() != libvirt.VIR_ERR_NO_DOMAIN:
- raise
- if ret < 0:
- del self.vm_dict[vm_id]
- continue
- else:
- try:
- vm_info = DomainInfo()
- ret = vm_info.set_domain_attribute(self.vm_online_dict.get(vm_id), host_topo)
- except libvirt.libvirtError as e:
- if e.get_error_code() != libvirt.VIR_ERR_NO_DOMAIN:
- raise
- if ret < 0:
- continue
- self.vm_dict[vm_id] = vm_info
+ else:
+ self.vm_dict[vm_id] = DomainInfo()
+ ret = self.vm_dict.get(vm_id).set_domain_attribute(self.vm_online_dict.get(vm_id), host_topo)
+ except libvirt.libvirtError as e:
+ ret = -1
+ # If domain doesn't stop, raise exception
+ if e.get_error_code() != libvirt.VIR_ERR_NO_DOMAIN and \
+ DOMAIN_STOP_MSG not in e.get_error_message():
+ raise
+ if ret < 0:
+ del self.vm_dict[vm_id]
+ continue
+ if self.vm_dict.get(vm_id).priority == 1:
+ self.low_prio_vm_dict[vm_id] = self.vm_dict.get(vm_id)
for cpu in range(host_topo.max_cpu_nums):
self.running_domain_in_cpus[cpu].append((self.vm_dict.get(vm_id).cpu_usage[cpu],
self.vm_dict.get(vm_id).domain_id,
self.vm_dict.get(vm_id).domain_name,
self.vm_dict.get(vm_id).priority))
- for vm_id in list(self.vm_dict):
- if vm_id not in self.vm_online_dict:
- del self.vm_dict[vm_id]
- elif vm_id not in self.low_prio_vm_dict and self.vm_dict.get(vm_id).priority == 1:
- self.low_prio_vm_dict[vm_id] = self.vm_dict.get(vm_id)
-
def get_all_active_domain(self, conn):
try:
self.domain_online = conn.listAllDomains(flags=libvirt.VIR_CONNECT_LIST_DOMAINS_ACTIVE)
--
2.33.0