skylark/cpu_qos-register-reset_domain_bandwidth-as-exit-func.patch
sundongxu e11c92af46 qos: Some bugfixes for power_qos/cachembw_qos/cpu_qos
cpu_qos: Register reset_domain_bandwidth as exit func
after adding power_qos job
power_qos/cachembw_qos: Add type check for environment
variables

Signed-off-by: sundongxu <sundongxu3@huawei.com>
2022-09-09 16:55:34 +08:00

114 lines
5.1 KiB
Diff

From a165c7131e09749401b01b3a7d568e96a9ca8b3a Mon Sep 17 00:00:00 2001
From: Dongxu Sun <sundongxu3@huawei.com>
Date: Sat, 3 Sep 2022 15:02:47 +0800
Subject: [PATCH 1/2] cpu_qos: Register reset_domain_bandwidth as exit func
after adding power_qos job
Currently, the domain bandwidth can be changed by
skylark only in power_qos job, so reset_domain_bandwidth
should be resgistered after adding power_qos job.
Besides, there is no need to reset domain bandwidth
when the domain cgroup path does not exist, since the
domain may have been stopped.
Signed-off-by: Dongxu Sun <sundongxu3@huawei.com>
---
qos_controller/cpucontroller.py | 21 ++++++++++-----------
skylark.py | 3 ++-
util.py | 5 +++--
3 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/qos_controller/cpucontroller.py b/qos_controller/cpucontroller.py
index f2a67e0..26b1240 100644
--- a/qos_controller/cpucontroller.py
+++ b/qos_controller/cpucontroller.py
@@ -63,12 +63,12 @@ class CpuController:
quota_path = os.path.join(vm_slices_path, domain.cgroup_name, "cpu.cfs_quota_us")
try:
- util.file_write(quota_path, str(domain_quota_us))
+ util.file_write(quota_path, str(domain_quota_us), log=False)
except IOError as error:
- LOGGER.error("Failed to limit domain %s(%d) cpu bandwidth: %s"
- % (domain.domain_name, domain.domain_id, str(error)))
# If VM doesn't stop, raise exception.
if os.access(quota_path, os.F_OK):
+ LOGGER.error("Failed to limit domain %s(%d) cpu bandwidth: %s"
+ % (domain.domain_name, domain.domain_id, str(error)))
raise
else:
LOGGER.info("Domain %s(%d) cpu bandwidth was limitted to %s"
@@ -83,12 +83,12 @@ class CpuController:
quota_path = os.path.join(vm_slices_path, domain.cgroup_name, "cpu.cfs_quota_us")
try:
- util.file_write(quota_path, str(initial_bandwidth))
+ util.file_write(quota_path, str(initial_bandwidth), log=False)
except IOError as error:
- LOGGER.error("Failed to recovery domain %s(%d) cpu bandwidth: %s!"
- % (domain.domain_name, domain.domain_id, str(error)))
# If VM doesn't stop, raise exception.
if os.access(quota_path, os.F_OK):
+ LOGGER.error("Failed to recovery domain %s(%d) cpu bandwidth: %s!"
+ % (domain.domain_name, domain.domain_id, str(error)))
raise
else:
LOGGER.info("Domain %s(%d) cpu bandwidth was recoveried to %s"
@@ -101,13 +101,12 @@ class CpuController:
domain = guest_info.low_prio_vm_dict.get(domain_id)
initial_bandwidth = domain.global_quota_config
quota_path = os.path.join(vm_slices_path, domain.cgroup_name, "cpu.cfs_quota_us")
-
try:
- util.file_write(quota_path, str(initial_bandwidth))
+ util.file_write(quota_path, str(initial_bandwidth), log=False)
except IOError:
- LOGGER.error("Failed to reset domain %s(%d) cpu bandwidth to its initial bandwidth %s!"
- % (domain.domain_name, domain.domain_id, initial_bandwidth))
- # This is on exiting path, make no sense to raise exception.
+ if os.access(quota_path, os.F_OK):
+ LOGGER.error("Failed to reset domain %s(%d) cpu bandwidth to its initial bandwidth %s!"
+ % (domain.domain_name, domain.domain_id, initial_bandwidth))
else:
LOGGER.info("Domain %s(%d) cpu bandwidth was reset to %s"
% (domain.domain_name, domain.domain_id, initial_bandwidth))
diff --git a/skylark.py b/skylark.py
index 6224f9b..2ec9862 100644
--- a/skylark.py
+++ b/skylark.py
@@ -84,8 +84,9 @@ class QosManager:
def init_qos_controller(self):
self.cpu_controller.set_low_priority_cgroup()
+ if os.getenv("POWER_QOS_MANAGEMENT", "false").lower() == "true":
+ atexit.register(self.cpu_controller.reset_domain_bandwidth, self.data_collector.guest_info)
self.cachembw_controller.init_cachembw_controller(self.data_collector.host_info.resctrl_info)
- atexit.register(self.cpu_controller.reset_domain_bandwidth, self.data_collector.guest_info)
self.net_controller.init_net_controller()
def start_scheduler(self):
diff --git a/util.py b/util.py
index 70f6f5a..2b8c3db 100644
--- a/util.py
+++ b/util.py
@@ -31,13 +31,14 @@ def file_read(file_path):
raise
-def file_write(file_path, value):
+def file_write(file_path, value, log=True):
try:
with open(file_path, 'wb') as file:
file.truncate()
file.write(str.encode(value))
except FileNotFoundError as error:
- LOGGER.error(str(error))
+ if log:
+ LOGGER.error(str(error))
raise
--
2.17.1