diff --git a/VERSION-vendor b/VERSION-vendor index 482edf9..96e7372 100644 --- a/VERSION-vendor +++ b/VERSION-vendor @@ -1 +1 @@ -2.0.0-1 +2.0.0-2 diff --git a/git-commit b/git-commit index b9b4168..ffc71cf 100644 --- a/git-commit +++ b/git-commit @@ -1 +1 @@ -37e6484adbbb01802e969ccf640e5232d974b2fb +8eb2e8d8f046224de4cd37bb404ecc261668a6b3 diff --git a/patch/0001-Support-Labels-field-to-configure-QoSLevel.patch b/patch/0001-Support-Labels-field-to-configure-QoSLevel.patch new file mode 100644 index 0000000..4852c65 --- /dev/null +++ b/patch/0001-Support-Labels-field-to-configure-QoSLevel.patch @@ -0,0 +1,188 @@ +From b8e000527f7129242bd71f9c79697bef3a8b2111 Mon Sep 17 00:00:00 2001 +From: wujing +Date: Wed, 10 May 2023 19:26:36 +0800 +Subject: [PATCH 1/7] Support Labels field to configure QoSLevel + +Signed-off-by: wujing +--- + pkg/core/typedef/podinfo.go | 39 +++++++++++++++++++++++++-- + pkg/services/dyncache/dynamic.go | 3 +-- + pkg/services/dyncache/sync.go | 3 +-- + pkg/services/iocost/iocost.go | 2 +- + pkg/services/preemption/preemption.go | 13 +++------ + tests/try/pod.go | 1 + + 6 files changed, 44 insertions(+), 17 deletions(-) + +diff --git a/pkg/core/typedef/podinfo.go b/pkg/core/typedef/podinfo.go +index 907f02b..fd96848 100644 +--- a/pkg/core/typedef/podinfo.go ++++ b/pkg/core/typedef/podinfo.go +@@ -15,6 +15,7 @@ + package typedef + + import ( ++ "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/core/typedef/cgroup" + ) + +@@ -26,6 +27,7 @@ type PodInfo struct { + Namespace string `json:"namespace"` + IDContainersMap map[string]*ContainerInfo `json:"containers,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` ++ Labels map[string]string `json:"labels,omitempty"` + } + + // NewPodInfo creates the PodInfo instance +@@ -37,6 +39,7 @@ func NewPodInfo(pod *RawPod) *PodInfo { + Hierarchy: cgroup.Hierarchy{Path: pod.CgroupPath()}, + IDContainersMap: pod.ExtractContainerInfos(), + Annotations: pod.DeepCopy().Annotations, ++ Labels: pod.DeepCopy().Labels, + } + } + +@@ -46,8 +49,9 @@ func (pod *PodInfo) DeepCopy() *PodInfo { + return nil + } + var ( +- contMap map[string]*ContainerInfo +- annoMap map[string]string ++ contMap map[string]*ContainerInfo ++ annoMap map[string]string ++ labelMap map[string]string + ) + // nil is different from empty value in golang + if pod.IDContainersMap != nil { +@@ -56,6 +60,7 @@ func (pod *PodInfo) DeepCopy() *PodInfo { + contMap[id] = cont.DeepCopy() + } + } ++ + if pod.Annotations != nil { + annoMap = make(map[string]string) + for k, v := range pod.Annotations { +@@ -63,12 +68,42 @@ func (pod *PodInfo) DeepCopy() *PodInfo { + } + } + ++ if pod.Labels != nil { ++ labelMap = make(map[string]string) ++ for k, v := range pod.Labels { ++ labelMap[k] = v ++ } ++ } ++ + return &PodInfo{ + Name: pod.Name, + UID: pod.UID, + Hierarchy: pod.Hierarchy, + Namespace: pod.Namespace, + Annotations: annoMap, ++ Labels: labelMap, + IDContainersMap: contMap, + } + } ++ ++// Offline is used to determine whether the pod is offline ++func (pod *PodInfo) Offline() bool { ++ var anno string ++ var label string ++ ++ if pod.Annotations != nil { ++ anno = pod.Annotations[constant.PriorityAnnotationKey] ++ } ++ ++ if pod.Labels != nil { ++ label = pod.Labels[constant.PriorityAnnotationKey] ++ } ++ ++ // Annotations have a higher priority than labels ++ return anno == "true" || label == "true" ++} ++ ++// Online is used to determine whether the pod is online ++func (pod *PodInfo) Online() bool { ++ return !pod.Offline() ++} +diff --git a/pkg/services/dyncache/dynamic.go b/pkg/services/dyncache/dynamic.go +index 09bde4c..d74efc7 100644 +--- a/pkg/services/dyncache/dynamic.go ++++ b/pkg/services/dyncache/dynamic.go +@@ -124,8 +124,7 @@ func (c *DynCache) doFlush(limitSet *limitSet) error { + } + + func (c *DynCache) listOnlinePods() map[string]*typedef.PodInfo { +- onlineValue := "false" + return c.Viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool { +- return pi.Annotations[constant.PriorityAnnotationKey] == onlineValue ++ return pi.Online() + }) + } +diff --git a/pkg/services/dyncache/sync.go b/pkg/services/dyncache/sync.go +index 8307c41..bf59cd4 100644 +--- a/pkg/services/dyncache/sync.go ++++ b/pkg/services/dyncache/sync.go +@@ -111,8 +111,7 @@ func (c *DynCache) syncLevel(pod *typedef.PodInfo) error { + } + + func (c *DynCache) listOfflinePods() map[string]*typedef.PodInfo { +- offlineValue := "true" + return c.Viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool { +- return pi.Annotations[constant.PriorityAnnotationKey] == offlineValue ++ return pi.Offline() + }) + } +diff --git a/pkg/services/iocost/iocost.go b/pkg/services/iocost/iocost.go +index e5298b1..c11ef60 100644 +--- a/pkg/services/iocost/iocost.go ++++ b/pkg/services/iocost/iocost.go +@@ -236,7 +236,7 @@ func (b *IOCost) clearIOCost() error { + + func (b *IOCost) configPodIOCostWeight(podInfo *typedef.PodInfo) error { + var weight uint64 = offlineWeight +- if podInfo.Annotations[constant.PriorityAnnotationKey] == "false" { ++ if podInfo.Online() { + weight = onlineWeight + } + for _, container := range podInfo.IDContainersMap { +diff --git a/pkg/services/preemption/preemption.go b/pkg/services/preemption/preemption.go +index ce436a3..28ec36e 100644 +--- a/pkg/services/preemption/preemption.go ++++ b/pkg/services/preemption/preemption.go +@@ -160,18 +160,11 @@ func getQoSLevel(pod *typedef.PodInfo) int { + if pod == nil { + return constant.Online + } +- anno, ok := pod.Annotations[constant.PriorityAnnotationKey] +- if !ok { +- return constant.Online +- } +- switch anno { +- case "true": ++ if pod.Offline() { + return constant.Offline +- case "false": +- return constant.Online +- default: +- return constant.Online + } ++ ++ return constant.Online + } + + // Validate will validate the qos service config +diff --git a/tests/try/pod.go b/tests/try/pod.go +index 18cb0ec..8053c4b 100644 +--- a/tests/try/pod.go ++++ b/tests/try/pod.go +@@ -60,6 +60,7 @@ func GenFakePodInfo(qosClass corev1.PodQOSClass) *typedef.PodInfo { + UID: constant.PodCgroupNamePrefix + podID, + Hierarchy: cgroup.Hierarchy{Path: genRelativeCgroupPath(qosClass, podID)}, + Annotations: make(map[string]string, 0), ++ Labels: make(map[string]string, 0), + } + return fakePod + } +-- +2.32.1 (Apple Git-133) + diff --git a/patch/0002-rubik-fix-weight-for-iocost-does-not-take-effect.patch b/patch/0002-rubik-fix-weight-for-iocost-does-not-take-effect.patch new file mode 100644 index 0000000..6548d27 --- /dev/null +++ b/patch/0002-rubik-fix-weight-for-iocost-does-not-take-effect.patch @@ -0,0 +1,69 @@ +From 804ff7873331cf745bc49aab5f5d2857ec1597c6 Mon Sep 17 00:00:00 2001 +From: hanchao +Date: Mon, 5 Jun 2023 13:56:01 +0800 +Subject: [PATCH 2/7] rubik: fix weight for iocost does not take effect + +reason: Fix weight for iocost does not take effect. +The iocost weight is at pod level, not container +level. +--- + pkg/services/iocost/iocost.go | 7 +------ + pkg/services/iocost/iocost_origin.go | 8 ++++---- + pkg/services/iocost/iocost_test.go | 2 +- + 3 files changed, 6 insertions(+), 11 deletions(-) + +diff --git a/pkg/services/iocost/iocost.go b/pkg/services/iocost/iocost.go +index e5298b1..34f508a 100644 +--- a/pkg/services/iocost/iocost.go ++++ b/pkg/services/iocost/iocost.go +@@ -239,10 +239,5 @@ func (b *IOCost) configPodIOCostWeight(podInfo *typedef.PodInfo) error { + if podInfo.Annotations[constant.PriorityAnnotationKey] == "false" { + weight = onlineWeight + } +- for _, container := range podInfo.IDContainersMap { +- if err := ConfigContainerIOCostWeight(container.Path, weight); err != nil { +- return err +- } +- } +- return nil ++ return ConfigPodIOCostWeight(podInfo.Path, weight) + } +diff --git a/pkg/services/iocost/iocost_origin.go b/pkg/services/iocost/iocost_origin.go +index d37109f..5e9948f 100644 +--- a/pkg/services/iocost/iocost_origin.go ++++ b/pkg/services/iocost/iocost_origin.go +@@ -63,14 +63,14 @@ func ConfigIOCostModel(devno string, p interface{}) error { + return cgroup.WriteCgroupFile(paramStr, blkcgRootDir, iocostModelFile) + } + +-// ConfigContainerIOCostWeight for config iocost weight ++// ConfigPodIOCostWeight for config iocost weight + // cgroup v1 iocost cannot be inherited. Therefore, only the container level can be configured. +-func ConfigContainerIOCostWeight(containerRelativePath string, weight uint64) error { ++func ConfigPodIOCostWeight(relativePath string, weight uint64) error { + if err := cgroup.WriteCgroupFile(strconv.FormatUint(weight, scale), blkcgRootDir, +- containerRelativePath, iocostWeightFile); err != nil { ++ relativePath, iocostWeightFile); err != nil { + return err + } +- if err := bindMemcgBlkcg(containerRelativePath); err != nil { ++ if err := bindMemcgBlkcg(relativePath); err != nil { + return err + } + return nil +diff --git a/pkg/services/iocost/iocost_test.go b/pkg/services/iocost/iocost_test.go +index 95b6d97..3bdadad 100644 +--- a/pkg/services/iocost/iocost_test.go ++++ b/pkg/services/iocost/iocost_test.go +@@ -334,7 +334,7 @@ func TestSetPodWeight(t *testing.T) { + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { +- err := ConfigContainerIOCostWeight(tt.cgroupPath, uint64(tt.weight)) ++ err := ConfigPodIOCostWeight(tt.cgroupPath, uint64(tt.weight)) + if tt.wantErr { + assert.Contains(t, err.Error(), tt.errMsg) + return +-- +2.32.1 (Apple Git-133) + diff --git a/patch/0003-rubik-test-coverage-for-PSI-Manager.patch b/patch/0003-rubik-test-coverage-for-PSI-Manager.patch new file mode 100644 index 0000000..42501cd --- /dev/null +++ b/patch/0003-rubik-test-coverage-for-PSI-Manager.patch @@ -0,0 +1,264 @@ +From e7c89f1935d117a2778339cc4774393331002254 Mon Sep 17 00:00:00 2001 +From: jingxiaolu +Date: Mon, 12 Jun 2023 23:12:37 +0800 +Subject: [PATCH 3/7] rubik: test coverage for PSI Manager + +Adding test cases for PSI Manager + +Signed-off-by: jingxiaolu +--- + Makefile | 3 + + pkg/config/config_test.go | 29 ++++++++ + pkg/services/psi/psi.go | 14 ++-- + pkg/services/psi/psi_test.go | 126 +++++++++++++++++++++++++++++++++++ + pkg/services/service_test.go | 4 ++ + 5 files changed, 169 insertions(+), 7 deletions(-) + create mode 100644 pkg/services/psi/psi_test.go + +diff --git a/Makefile b/Makefile +index 7a92d12..bd66147 100644 +--- a/Makefile ++++ b/Makefile +@@ -54,6 +54,7 @@ help: + @echo "make test-unit # run unit test" + @echo "make cover # generate coverage report" + @echo "make install # install files to /var/lib/rubik" ++ @echo "make clean" # clean built files and test logs + + prepare: + mkdir -p $(TMP_DIR) $(BUILD_DIR) +@@ -101,3 +102,5 @@ install: + cp -f $(BUILD_DIR)/* $(INSTALL_DIR) + cp -f $(BUILD_DIR)/rubik.service /lib/systemd/system/ + ++clean: ++ rm -rf build/* cover.* unit_test_log +diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go +index dbbd2e4..03ff4ca 100644 +--- a/pkg/config/config_test.go ++++ b/pkg/config/config_test.go +@@ -53,6 +53,35 @@ var rubikConfig string = ` + "mid": 30, + "high": 50 + } ++ }, ++ "ioCost": [ ++ { ++ "nodeName": "k8s-single", ++ "config": [ ++ { ++ "dev": "sdb", ++ "enable": true, ++ "model": "linear", ++ "param": { ++ "rbps": 10000000, ++ "rseqiops": 10000000, ++ "rrandiops": 10000000, ++ "wbps": 10000000, ++ "wseqiops": 10000000, ++ "wrandiops": 10000000 ++ } ++ } ++ ] ++ } ++ ], ++ "psi": { ++ "interval": 10, ++ "resource": [ ++ "cpu", ++ "memory", ++ "io" ++ ], ++ "avg10Threshold": 5.0 + } + } + ` +diff --git a/pkg/services/psi/psi.go b/pkg/services/psi/psi.go +index 1c70255..a55922e 100644 +--- a/pkg/services/psi/psi.go ++++ b/pkg/services/psi/psi.go +@@ -37,19 +37,19 @@ const ( + minThreshold float64 = 5.0 + ) + +-// Factory is the QuotaTurbo factory class ++// Factory is the PSI Manager factory class + type Factory struct { + ObjName string + } + + // Name returns the factory class name +-func (i Factory) Name() string { +- return "Factory" ++func (f Factory) Name() string { ++ return "PSIFactory" + } + +-// NewObj returns a QuotaTurbo object +-func (i Factory) NewObj() (interface{}, error) { +- return NewManager(i.ObjName), nil ++// NewObj returns a Manager object ++func (f Factory) NewObj() (interface{}, error) { ++ return NewManager(f.ObjName), nil + } + + // Config is PSI service configuration +@@ -130,7 +130,7 @@ func (m *Manager) SetConfig(f helper.ConfigHandler) error { + } + + // IsRunner returns true that tells other Manager is a persistent service +-func (qt *Manager) IsRunner() bool { ++func (m *Manager) IsRunner() bool { + return true + } + +diff --git a/pkg/services/psi/psi_test.go b/pkg/services/psi/psi_test.go +new file mode 100644 +index 0000000..2036aa1 +--- /dev/null ++++ b/pkg/services/psi/psi_test.go +@@ -0,0 +1,126 @@ ++// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. ++// rubik licensed under the Mulan PSL v2. ++// You can use this software according to the terms and conditions of the Mulan PSL v2. ++// You may obtain a copy of Mulan PSL v2 at: ++// http://license.coscl.org.cn/MulanPSL2 ++// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++// PURPOSE. ++// See the Mulan PSL v2 for more details. ++// Author: Jingxiao Lu ++// Date: 2023-06-12 ++// Description: This file is used for testing psi.go ++ ++package psi ++ ++import ( ++ "context" ++ "fmt" ++ "testing" ++ "time" ++ ++ "isula.org/rubik/pkg/api" ++ "isula.org/rubik/pkg/core/typedef" ++) ++ ++// TestNewManagerObj tests NewObj() for Factory ++func TestNewManagerObj(t *testing.T) { ++ var fact = Factory{ ++ ObjName: "psi", ++ } ++ nm, err := fact.NewObj() ++ if err != nil { ++ t.Fatalf("New PSI Manager failed: %v", err) ++ return ++ } ++ fmt.Printf("New PSI Manager %s is %#v", fact.Name(), nm) ++} ++ ++// TestConfigValidate tests Config Validate ++func TestConfigValidate(t *testing.T) { ++ var tests = []struct { ++ name string ++ conf *Config ++ wantErr bool ++ }{ ++ { ++ name: "TC1 - Default Config", ++ conf: NewConfig(), ++ wantErr: true, ++ }, ++ { ++ name: "TC2 - Wrong Interval value", ++ conf: &Config{ ++ Interval: minInterval - 1, ++ }, ++ wantErr: true, ++ }, ++ { ++ name: "TC3 - Wrong Threshold value", ++ conf: &Config{ ++ Interval: minInterval, ++ Avg10Threshold: minThreshold - 1, ++ }, ++ wantErr: true, ++ }, ++ { ++ name: "TC4 - No resource type specified", ++ conf: &Config{ ++ Interval: minInterval, ++ Avg10Threshold: minThreshold, ++ }, ++ wantErr: true, ++ }, ++ { ++ name: "TC5 - Wrong resource type cpuacct - cpuacct is for psi subsystem, not for resource type", ++ conf: &Config{ ++ Interval: minInterval, ++ Avg10Threshold: minThreshold, ++ Resource: []string{"cpu", "memory", "io", "cpuacct"}, ++ }, ++ wantErr: true, ++ }, ++ { ++ name: "TC6 - Success case - trully end", ++ conf: &Config{ ++ Interval: minInterval, ++ Avg10Threshold: minThreshold, ++ Resource: []string{"cpu", "memory", "io"}, ++ }, ++ wantErr: false, ++ }, ++ } ++ for _, tc := range tests { ++ t.Run(tc.name, func(t *testing.T) { ++ if err := tc.conf.Validate(); (err != nil) != tc.wantErr { ++ t.Errorf("Config.Validate() error = %v, wantErr %v", err, tc.wantErr) ++ } ++ }) ++ } ++} ++ ++type FakeManager struct{} ++ ++func (m *FakeManager) ListContainersWithOptions(options ...api.ListOption) map[string]*typedef.ContainerInfo { ++ return make(map[string]*typedef.ContainerInfo) ++} ++func (m *FakeManager) ListPodsWithOptions(options ...api.ListOption) map[string]*typedef.PodInfo { ++ return make(map[string]*typedef.PodInfo, 1) ++} ++ ++// TestManagerRun creates a fake manager and runs it ++func TestManagerRun(t *testing.T) { ++ nm := NewManager("psi") ++ nm.conf.Interval = 1 ++ nm.PreStart(&FakeManager{}) ++ nm.SetConfig(func(configName string, d interface{}) error { return nil }) ++ if !nm.IsRunner() { ++ t.Fatalf("FakeManager is not a runner!") ++ return ++ } ++ ++ ctx, cancel := context.WithCancel(context.Background()) ++ go nm.Run(ctx) ++ time.Sleep(time.Second) ++ cancel() ++} +diff --git a/pkg/services/service_test.go b/pkg/services/service_test.go +index a6e0298..537d0b3 100644 +--- a/pkg/services/service_test.go ++++ b/pkg/services/service_test.go +@@ -36,6 +36,10 @@ var defaultFeature = []FeatureSpec{ + Name: feature.QuotaTurboFeature, + Default: true, + }, ++ { ++ Name: feature.PSIFeature, ++ Default: true, ++ }, + } + + func TestErrorInitServiceComponents(t *testing.T) { +-- +2.32.1 (Apple Git-133) + diff --git a/patch/0004-rubik-add-psi-design-documentation.patch b/patch/0004-rubik-add-psi-design-documentation.patch new file mode 100644 index 0000000..6c83e36 --- /dev/null +++ b/patch/0004-rubik-add-psi-design-documentation.patch @@ -0,0 +1,199 @@ +From c23aa3e37e0bffc42c6233d0eb89a5c56f1fc77b Mon Sep 17 00:00:00 2001 +From: vegbir +Date: Sat, 10 Jun 2023 11:41:04 +0800 +Subject: [PATCH 4/7] rubik: add psi design documentation + +Signed-off-by: vegbir +--- + CHANGELOG/CHANGELOG-2.0.0.md | 29 +++++++-- + docs/design/psi.md | 94 +++++++++++++++++++++++++++++ + docs/images/psi/PSI_designation.svg | 16 +++++ + docs/images/psi/PSI_implement.svg | 4 ++ + 4 files changed, 139 insertions(+), 4 deletions(-) + create mode 100644 docs/design/psi.md + create mode 100644 docs/images/psi/PSI_designation.svg + create mode 100644 docs/images/psi/PSI_implement.svg + +diff --git a/CHANGELOG/CHANGELOG-2.0.0.md b/CHANGELOG/CHANGELOG-2.0.0.md +index 5cc2cb8..b46fa3d 100644 +--- a/CHANGELOG/CHANGELOG-2.0.0.md ++++ b/CHANGELOG/CHANGELOG-2.0.0.md +@@ -1,16 +1,37 @@ +-1. Architecture optimization: ++# CHANGELOG ++ ++## v2.0.1 ++ ++### New Feature ++ ++Before June 30, 2023 ++ ++1. **dynMemory** (asynchronous memory classification recovery): implement fssr strategy ++2. **psi**: interference detection based on PSI index ++3. **quotaTurbo**: elastic cpu limit user mode solution ++ ++## v2.0.0 ++ ++### Architecture optimization ++ + refactor rubik through `informer-podmanager-services` mechanism, decoupling modules and improving performance +-2. Interface change: ++ ++### Interface change ++ + - configuration file changes + - use the list-watch mechanism to get the pod instead of the http interface +-3. Feature enhancements: ++ ++### Feature enhancements ++ + - support elastic cpu limit user mode scheme-quotaturbo + - support psi index observation + - support memory asynchronous recovery feature (fssr optimization) + - support memory access bandwidth and LLC limit + - optimize the absolute preemption + - optimize the elastic cpu limiting kernel mode scheme-quotaburst +-4. Other optimizations: ++ ++### Other optimizations ++ + - document optimization + - typo fix + - compile option optimization +diff --git a/docs/design/psi.md b/docs/design/psi.md +new file mode 100644 +index 0000000..674a8e0 +--- /dev/null ++++ b/docs/design/psi.md +@@ -0,0 +1,94 @@ ++# 【需求设计】基于PSI指标的干扰检测 ++ ++## 需求设计图 ++ ++![PSI_designation](../images/psi/PSI_designation.svg) ++ ++## 实现思路 ++ ++### PSI简介 ++ ++PSI是Pressure Stall Information的简称,用于评估当前系统三大基础硬件资源CPU、Memory、IO的压力。顾名思义,当进程无法获得运行所需的资源时将会产生停顿,PSI就是衡量进程停顿时间长度的度量标准。 ++ ++### 使能cgroupv1 psi特性 ++ ++首先,检查是否开启cgroup v1的PSI。两种方法,看看文件是否存在或者查看内核启动命令行是否包含psi相关选项。 ++ ++```bash ++cat /proc/cmdline | grep "psi=1 psi_v1=1" ++``` ++ ++若无,则新增内核启动命令行 ++ ++```bash ++# 查看内核版本号 ++uname -a ++# Linux openEuler 5.10.0-136.12.0.86.oe2203sp1.x86_64 #1 ++# 找到内核的boot文件 ++ls /boot/vmlinuz-5.10.0-136.12.0.86.oe2203sp1.x86_64 ++# 新增参数 ++grubby --update-kernel="/boot/vmlinuz-5.10.0-136.12.0.86.oe2203sp1.x86_64" --args="psi=1 psi_v1=1" ++# 重启 ++reboot ++``` ++ ++随后便可以在cgroup v1中使用psi的三个文件观测数据。 ++例如,在`/sys/fs/cgroup/cpu,cpuacct/kubepods/burstable//`目录下,涉及如下文件: ++ ++- cpu.pressure ++- memory.pressure ++- io.pressure ++ ++### 方案流程 ++ ++针对PSI格式数据,使用`some avg10`作为观测指标。它表示任一任务在10s内的平均阻塞时间占比。 ++ ++用户通过配置阈值保障在线Pod的资源可用以及高性能。具体来说,当阻塞占比超过某一阈值(默认为5%),则rubik按照一定策略驱逐离线Pod,释放相应资源。 ++ ++在离线业务由注解`volcano.sh/preemptable="true"/"false"`标识。 ++ ++```yaml ++annotations: ++ volcano.sh/preemptable: true ++``` ++ ++在线Pod的CPU和内存利用率偏高,rubik会驱逐当前占用CPU资源/内存资源最多的离线业务。若离线业务I/O高,则会选择驱逐CPU资源占用最多的离线业务。 ++> 注1:当前cgroup控制io带宽手段有效,难以精准判断驱逐哪个业务会降低io,因此暂时采用CPU利用率作为标准。 ++> ++> 注2:通过cadvisor库实时获取离线业务的CPU利用率、内存占用量、IO带宽等信息,按指标从大到小排序。 ++ ++需要处理可疑对象时则通过责任链设计模式传递事件处理请求,并执行相应操作。 ++ ++## 实现设计 ++ ++![PSI_implement](../images/psi/PSI_implement.svg) ++ ++## 接口设计 ++ ++```yaml ++data: ++ config.json: | ++ { ++ "agent": { ++ "enabledFeatures": [ ++ "psi" ++ ] ++ }, ++ "psi": { ++ "resource": [ ++ "cpu", ++ "memory", ++ "io", ++ ], ++ "interval": 10 ++ } ++ } ++``` ++ ++`psi`字段用于标识基于psi指标的干扰检测特性配置。目前,psi特性支持监测CPU、内存和I/O资源,用户可以按需配置该字段,单独或组合监测资源的PSI取值。 ++ ++| 配置键[=默认值] | 类型 | 描述 | 可选值 | ++| --------------- | ---------- | -------------------------------- | ----------- | ++| interval=10 |int|psi指标监测间隔(单位:秒)| [10,30]| ++| resource=[] | string数组 | 资源类型,声明何种资源需要被访问 | cpu, memory, io | ++| avg10Threshold=5.0 | float | psi some类型资源平均10s内的压制百分比阈值(单位:%),超过该阈值则驱逐离线业务 | [5.0,100]| +diff --git a/docs/images/psi/PSI_designation.svg b/docs/images/psi/PSI_designation.svg +new file mode 100644 +index 0000000..8b829e8 +--- /dev/null ++++ b/docs/images/psi/PSI_designation.svg +@@ -0,0 +1,16 @@ ++ ++ ++ ++ ++ ++ ++ 开始遍历在线Pod列表读取并解析Pod PSI指标是否支持cgroupV1 PSI接口?/sys/fs/cgroup/cpuacct/cpu.pressure...io.pressure ...memory.pressure标记PSI指标最大值分别为cpu_max、mem_max、io_maxcpu_max >= threshold?mem_max >= threshold?io_max >= threshold?按照CPU利用率对离线业务进行排序按照内存占用量对离线业务进行排序按io带宽对离线业务进行排序处理可疑对象日志告警应用驱逐 +\ No newline at end of file +diff --git a/docs/images/psi/PSI_implement.svg b/docs/images/psi/PSI_implement.svg +new file mode 100644 +index 0000000..9704504 +--- /dev/null ++++ b/docs/images/psi/PSI_implement.svg +@@ -0,0 +1,4 @@ ++ ++ ++ ++cadvisor

<<Interface>>
Metric


+ Update() error

+ AddTrigger(...Trigger) Metric


<<Interface>>...
BaseMetric
BaseMetric
attributes
attributes
trigger []Trigger
trigger []Trigger
operations
operations
AddTrigger(...Trigger) Metric
Update() error
AddTrigger(...Trigger) Metri...

Manager
Manager
attributes
attributes
PSIConfig
Viewer
PSIConfig...
operations
operations
IsRunner() bool
Run(context.Context)
SetConfig(helper.CnfigHandler) error
PreStart(api.Viewer) error
Terminate(api.Viewer) error
IsRunner() bool...

<<Singleton>>

expulsionExec



<<Singleton>>...

<<Singleton>>

resourceAnalysisExec



<<Singleton>>...
PSIConfig+ Interval: int+ Resource: []string

<<Interface>>
Analyzer


+ MaxCPUUtil([]*PodInfo) *PodInfo

+ MaxIOBandWidth([]*PodInfo) *PodInfo

+ MaxMemUtil([]*PodInfo)*PodInfo


<<Interface>>...

<<Interface>>
Trigger


+ Execute(TriggerFactor) error

+ SetNext(...Trigger) Trigger

+ Name() string

<<Interface>>...

<<Interface>>
TriggerFactor


+ Message() string

+ TargetPods() map[string]*typedef.PodInfo


<<Interface>>...
Use
Use
Use
Use
Manager
Manager
attributes
attributes
manager.Manager
manager.Manager
operations
operations

+ New()
+ Start() error
+ ContainerInfoV2() 

+ New()...
BasePSIMetric
BasePSIMetric
attributes
attributes
*metric.BaseMetric
avg10Threshold float64
resources []string
conservation map[string]*typedef.PodInfo
suspicion map[string]*typedef.PodInfo
*metric.BaseMetric...
operations
operations
Update() error
Update() error
Use
Use
Use
Use
Use
Use
Use
Use
Use
Use
Text is not SVG - cannot display
+\ No newline at end of file +-- +2.32.1 (Apple Git-133) + diff --git a/patch/0005-rubik-move-fssr-design-document-to-design-dir.patch b/patch/0005-rubik-move-fssr-design-document-to-design-dir.patch new file mode 100644 index 0000000..410df3c --- /dev/null +++ b/patch/0005-rubik-move-fssr-design-document-to-design-dir.patch @@ -0,0 +1,83 @@ +From c74d87538bc27a20c24d27319c5d60970ab3ccab Mon Sep 17 00:00:00 2001 +From: vegbir +Date: Wed, 14 Jun 2023 17:14:16 +0800 +Subject: [PATCH 5/7] rubik: move fssr design document to design dir + +Signed-off-by: vegbir +--- + docs/{ => design}/fssr.md | 18 ++++++++++++++---- + .../fssr/flowchart.png} | Bin + .../fssr/sequence_diagram.png} | Bin + 3 files changed, 14 insertions(+), 4 deletions(-) + rename docs/{ => design}/fssr.md (90%) + rename docs/{png/rubik_fssr_2.png => images/fssr/flowchart.png} (100%) + rename docs/{png/rubik_fssr_1.png => images/fssr/sequence_diagram.png} (100%) + +diff --git a/docs/fssr.md b/docs/design/fssr.md +similarity index 90% +rename from docs/fssr.md +rename to docs/design/fssr.md +index 3fb36bd..184b364 100644 +--- a/docs/fssr.md ++++ b/docs/design/fssr.md +@@ -1,20 +1,27 @@ ++# 【需求设计】异步内存分级回收 fssr策略 ++ + ## 方案目标 ++ + 在混部集群中,在线和离线业务被同时部署到同一物理资源(节点)上,同时离线业务是内存资源消耗型,在线业务有波峰波谷,在离线业务之间内存资源竞争导致在线业务受影响。该方案目标在充分利用内存资源的同时保证在线QoS。 + + ## 总体设计 ++ + 各个模块之间的联系如下: +-![](png/rubik_fssr_1.png) ++![sequence_diagram](../images/fssr/sequence_diagram.png) + + - 用户部署rubik,rubik向k8s注册监听pod事件。 + - 当离线业务被部署时k8s会通知rubik,rubik向该离线pod配置memory.high。 + - 同时rubik实时监控当前节点的内存使用量,使用fssr策略向pod配置memory.high。 + + ### 依赖说明 ++ + 内核需要支持memcg级内存水位线方案,即提供`memory.high`和`memory.high_async_ratio`。 + + ### 详细设计 ++ + 内存分级方案中,rubik新增FSSR内存处理模块,该模块主要处理获取主机(节点)的总内存(total memory)、预留内存(reserved memory)、剩余内存(free memory)。并根据FSSR算法设置离线内存的memory.high。具体策略如下: +-![](png/rubik_fssr_2.png) ++![flowchart](../images/fssr/flowchart.png) ++ + - rubik启动时计算预留内存,默认为总内存的10%,如果总内存的10%超过10G,则为10G + - 配置离线容器的cgroup级别水位线,内核提供`memory.high`和`memory.high_async_ratio`两个接口,分别配置cgroup的软上限和警戒水位线。启动rubik时默认配置`memory.high`为`total_memory`(总内存)`*`80% + - 获取剩余内存free_memory +@@ -22,13 +29,16 @@ + - 持续一分钟free_memory>2`*`reserved_memory时提高离线的memory.high,每次提升总内存的1%,total_memory`*`1% + + 说明: ++ + 1. 离线应用memory.high的范围为`[total_memory*30%, total_memory*80%]` + + ### 配置说明 +-``` ++ ++```json + "dynMemory": { + "policy": "fssr" + } + ``` ++ + - dynMemory表示动态内存 +-- policy目前只支持fssr +\ No newline at end of file ++- policy目前只支持fssr +diff --git a/docs/png/rubik_fssr_2.png b/docs/images/fssr/flowchart.png +similarity index 100% +rename from docs/png/rubik_fssr_2.png +rename to docs/images/fssr/flowchart.png +diff --git a/docs/png/rubik_fssr_1.png b/docs/images/fssr/sequence_diagram.png +similarity index 100% +rename from docs/png/rubik_fssr_1.png +rename to docs/images/fssr/sequence_diagram.png +-- +2.32.1 (Apple Git-133) + diff --git a/patch/0006-rubik-fix-that-value-of-memory.high_async_ratio-lost.patch b/patch/0006-rubik-fix-that-value-of-memory.high_async_ratio-lost.patch new file mode 100644 index 0000000..6d8d083 --- /dev/null +++ b/patch/0006-rubik-fix-that-value-of-memory.high_async_ratio-lost.patch @@ -0,0 +1,246 @@ +From 526bd12a3b77135ce5f112f3195f1e7d41d965d5 Mon Sep 17 00:00:00 2001 +From: hanchao +Date: Fri, 16 Jun 2023 13:10:42 +0800 +Subject: [PATCH 6/7] rubik: fix that value of memory.high_async_ratio lost + efficacy + +--- + pkg/services/dynmemory/dynmemory.go | 22 +++++++- + pkg/services/dynmemory/fssr.go | 78 +++++++++++++++++------------ + 2 files changed, 67 insertions(+), 33 deletions(-) + +diff --git a/pkg/services/dynmemory/dynmemory.go b/pkg/services/dynmemory/dynmemory.go +index da859dd..b73f476 100644 +--- a/pkg/services/dynmemory/dynmemory.go ++++ b/pkg/services/dynmemory/dynmemory.go +@@ -6,6 +6,7 @@ import ( + "time" + + "isula.org/rubik/pkg/api" ++ "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/services/helper" + "k8s.io/apimachinery/pkg/util/wait" + ) +@@ -15,6 +16,7 @@ type DynMemoryAdapter interface { + preStart(api.Viewer) error + getInterval() int + dynamicAdjust() ++ setOfflinePod(path string) error + } + type dynMemoryConfig struct { + Policy string `json:"policy,omitempty"` +@@ -42,11 +44,11 @@ type DynMemory struct { + } + + // PreStart is an interface for calling a collection of methods when the service is pre-started +-func (dynMem *DynMemory) PreStart(api api.Viewer) error { ++func (dynMem *DynMemory) PreStart(viewer api.Viewer) error { + if dynMem.dynMemoryAdapter == nil { + return nil + } +- return dynMem.dynMemoryAdapter.preStart(api) ++ return dynMem.dynMemoryAdapter.preStart(viewer) + } + + // SetConfig is an interface that invoke the ConfigHandler to obtain the corresponding configuration. +@@ -81,6 +83,22 @@ func (dynMem *DynMemory) IsRunner() bool { + return true + } + ++// AddPod to deal the event of adding a pod. ++func (dynMem *DynMemory) AddPod(podInfo *typedef.PodInfo) error { ++ if podInfo.Offline() { ++ return dynMem.dynMemoryAdapter.setOfflinePod(podInfo.Path) ++ } ++ return nil ++} ++ ++// UpdatePod to deal the pod update event. ++func (dynMem *DynMemory) UpdatePod(old, new *typedef.PodInfo) error { ++ if new.Offline() { ++ return dynMem.dynMemoryAdapter.setOfflinePod(new.Path) ++ } ++ return nil ++} ++ + // newAdapter to create adapter of dyn memory. + func newAdapter(policy string) DynMemoryAdapter { + switch policy { +diff --git a/pkg/services/dynmemory/fssr.go b/pkg/services/dynmemory/fssr.go +index 9fe4042..e23a4bc 100644 +--- a/pkg/services/dynmemory/fssr.go ++++ b/pkg/services/dynmemory/fssr.go +@@ -9,6 +9,7 @@ import ( + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/constant" ++ "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" +@@ -30,71 +31,76 @@ type fssrDynMemAdapter struct { + memTotal int64 + memHigh int64 + reservedMem int64 +- api api.Viewer + count int64 ++ viewer api.Viewer + } + +-// initFssrDynMemAdapter function ++// initFssrDynMemAdapter initializes a new fssrDynMemAdapter struct. + func initFssrDynMemAdapter() *fssrDynMemAdapter { + if total, err := getFieldMemory("MemTotal"); err == nil && total > 0 { + return &fssrDynMemAdapter{ + memTotal: total, + memHigh: total * 8 / 10, +- reservedMem: total * 8 / 10, ++ reservedMem: total * 1 / 10, ++ count: 0, + } + } + return nil + } + +-// preStart function +-func (f *fssrDynMemAdapter) preStart(api api.Viewer) error { +- f.api = api ++// preStart initializes the fssrDynMemAdapter with the provided viewer and ++// deals with any existing pods. ++func (f *fssrDynMemAdapter) preStart(viewer api.Viewer) error { ++ f.viewer = viewer + return f.dealExistedPods() + } + +-// getInterval function ++// getInterval returns the fssrInterval value. + func (f *fssrDynMemAdapter) getInterval() int { + return fssrInterval + } + +-// dynadjust function ++// dynamicAdjust adjusts the memory allocation of the fssrDynMemAdapter by ++// increasing or decreasing the amount of memory reserved for offline pods ++// based on the current amount of free memory available on the system. + func (f *fssrDynMemAdapter) dynamicAdjust() { + var freeMem int64 + var err error + if freeMem, err = getFieldMemory("MemFree"); err != nil { + return + } ++ ++ var memHigh int64 = 0 + if freeMem > 2*f.reservedMem { + if f.count < fssrIntervalCount { + f.count++ + return + } +- memHigh := f.memHigh + f.memTotal/100 ++ // no risk of overflow ++ memHigh = f.memHigh + f.memTotal/100 + if memHigh > f.memTotal*8/10 { + memHigh = f.memTotal * 8 / 10 + } +- if memHigh != f.memHigh { +- f.memHigh = memHigh +- f.adjustOfflinePodHighMemory() +- } + } else if freeMem < f.reservedMem { +- memHigh := f.memHigh - f.memTotal/10 ++ memHigh = f.memHigh - f.memTotal/10 + if memHigh < 0 { + return + } + if memHigh < f.memTotal*3/10 { + memHigh = f.memTotal * 3 / 10 + } +- if memHigh != f.memHigh { +- f.memHigh = memHigh +- f.adjustOfflinePodHighMemory() +- } + } ++ if memHigh != f.memHigh { ++ f.memHigh = memHigh ++ f.adjustOfflinePodHighMemory() ++ } ++ + f.count = 0 + } + ++// adjustOfflinePodHighMemory adjusts the memory.high of offline pods. + func (f *fssrDynMemAdapter) adjustOfflinePodHighMemory() error { +- pods := listOfflinePods(f.api) ++ pods := listOfflinePods(f.viewer) + for _, podInfo := range pods { + if err := setOfflinePodHighMemory(podInfo.Path, f.memHigh); err != nil { + return err +@@ -103,20 +109,18 @@ func (f *fssrDynMemAdapter) adjustOfflinePodHighMemory() error { + return nil + } + +-// dealExistedPods function ++// dealExistedPods handles offline pods by setting their memory.high and memory.high_async_ratio + func (f *fssrDynMemAdapter) dealExistedPods() error { +- pods := listOfflinePods(f.api) ++ pods := listOfflinePods(f.viewer) + for _, podInfo := range pods { +- if err := setOfflinePodHighMemory(podInfo.Path, f.memHigh); err != nil { +- return err +- } +- if err := setOfflinePodHighAsyncRatio(podInfo.Path, highRatio); err != nil { +- return err ++ if err := f.setOfflinePod(podInfo.Path); err != nil { ++ log.Errorf("set fssr of offline pod[%v] error:%v", podInfo.UID, err) + } + } + return nil + } + ++// listOfflinePods returns a map of offline PodInfo objects. + func listOfflinePods(viewer api.Viewer) map[string]*typedef.PodInfo { + offlineValue := "true" + return viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool { +@@ -124,23 +128,35 @@ func listOfflinePods(viewer api.Viewer) map[string]*typedef.PodInfo { + }) + } + +-func setOfflinePodHighMemory(podPath string, high int64) error { +- if err := cgroup.WriteCgroupFile(strconv.FormatUint(uint64(high), scale), memcgRootDir, ++// setOfflinePod sets the offline pod for the given path. ++func (f *fssrDynMemAdapter) setOfflinePod(path string) error { ++ if err := setOfflinePodHighAsyncRatio(path, highRatio); err != nil { ++ return err ++ } ++ return setOfflinePodHighMemory(path, f.memHigh) ++} ++ ++// setOfflinePodHighMemory sets the high memory limit for the specified pod in the ++// cgroup memory ++func setOfflinePodHighMemory(podPath string, memHigh int64) error { ++ if err := cgroup.WriteCgroupFile(strconv.FormatUint(uint64(memHigh), scale), memcgRootDir, + podPath, highMemFile); err != nil { + return err + } + return nil + } + +-func setOfflinePodHighAsyncRatio(podPath string, ratio uint64) error { +- if err := cgroup.WriteCgroupFile(strconv.FormatUint(ratio, scale), memcgRootDir, ++// setOfflinePodHighAsyncRatio sets the high memory async ratio for a pod in an offline state. ++func setOfflinePodHighAsyncRatio(podPath string, ratio uint) error { ++ if err := cgroup.WriteCgroupFile(strconv.FormatUint(uint64(ratio), scale), memcgRootDir, + podPath, highMemAsyncRatioFile); err != nil { + return err + } + return nil + } + +-// getFieldMemory function ++// getFieldMemory retrieves the amount of memory used by a certain field in the ++// memory information file. + func getFieldMemory(field string) (int64, error) { + if !util.PathExist(memInfoFile) { + return 0, fmt.Errorf("%v: no such file or diretory", memInfoFile) +-- +2.32.1 (Apple Git-133) + diff --git a/patch/0007-bugfix-fix-typos-calling-order-of-waitgroup.patch b/patch/0007-bugfix-fix-typos-calling-order-of-waitgroup.patch new file mode 100644 index 0000000..408aa55 --- /dev/null +++ b/patch/0007-bugfix-fix-typos-calling-order-of-waitgroup.patch @@ -0,0 +1,127 @@ +From 6b9d862857a1b302b26d8d51e7df5fed3062ba94 Mon Sep 17 00:00:00 2001 +From: vegbir +Date: Mon, 19 Jun 2023 11:47:53 +0800 +Subject: [PATCH 7/7] bugfix: fix typos & calling order of waitgroup + +Signed-off-by: vegbir +--- + pkg/core/trigger/base.go | 4 ++-- + pkg/core/trigger/expulsion.go | 2 +- + pkg/core/trigger/resourceanalysis.go | 6 +++--- + pkg/rubik/servicemanager.go | 6 +++--- + 4 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/pkg/core/trigger/base.go b/pkg/core/trigger/base.go +index 7f1fbe9..c212f66 100644 +--- a/pkg/core/trigger/base.go ++++ b/pkg/core/trigger/base.go +@@ -70,8 +70,8 @@ type TreeTrigger struct { + subTriggers []Trigger + } + +-// withTreeTirgger returns a BaseMetric object +-func withTreeTirgger(name string, exec Executor) *TreeTrigger { ++// withTreeTrigger returns a BaseMetric object ++func withTreeTrigger(name string, exec Executor) *TreeTrigger { + return &TreeTrigger{ + name: name, + exec: exec, +diff --git a/pkg/core/trigger/expulsion.go b/pkg/core/trigger/expulsion.go +index 87dd484..e438d3d 100644 +--- a/pkg/core/trigger/expulsion.go ++++ b/pkg/core/trigger/expulsion.go +@@ -41,7 +41,7 @@ var expulsionCreator = func() Trigger { + appendUsedExecutors(ExpulsionAnno, expulsionExec) + } + } +- return withTreeTirgger(ExpulsionAnno, expulsionExec) ++ return withTreeTrigger(ExpulsionAnno, expulsionExec) + } + + // Expulsion is the trigger to evict pods +diff --git a/pkg/core/trigger/resourceanalysis.go b/pkg/core/trigger/resourceanalysis.go +index a3d99e5..7e7413e 100644 +--- a/pkg/core/trigger/resourceanalysis.go ++++ b/pkg/core/trigger/resourceanalysis.go +@@ -49,7 +49,7 @@ var analyzerCreator = func() Trigger { + appendUsedExecutors(ResourceAnalysisAnno, resourceAnalysisExec) + } + } +- return withTreeTirgger(ResourceAnalysisAnno, resourceAnalysisExec) ++ return withTreeTrigger(ResourceAnalysisAnno, resourceAnalysisExec) + } + + // rreqOpt is the option to get information from cadvisor +@@ -161,7 +161,7 @@ func (a *Analyzer) maxCPUUtil(pods map[string]*typedef.PodInfo) *typedef.PodInfo + } + } + if chosen != nil { +- log.Infof("find the max cpu util pod \"%v\": %v", chosen.Name, maxUtil) ++ log.Infof("find the pod(%v) with the highest cpu utilization(%v)", chosen.Name, maxUtil) + } + return chosen + } +@@ -185,7 +185,7 @@ func (a *Analyzer) maxMemoryUtil(pods map[string]*typedef.PodInfo) *typedef.PodI + } + } + if chosen != nil { +- log.Infof("find the max cpu util pod \"%v\": %v", chosen.Name, maxUtil) ++ log.Infof("find the pod(%v) with the highest memory utilization(%v)", chosen.Name, maxUtil) + } + return chosen + } +diff --git a/pkg/rubik/servicemanager.go b/pkg/rubik/servicemanager.go +index 3e162b6..c3b252a 100644 +--- a/pkg/rubik/servicemanager.go ++++ b/pkg/rubik/servicemanager.go +@@ -218,7 +218,6 @@ func (manager *ServiceManager) addFunc(event typedef.Event) { + + const retryCount = 5 + addOnce := func(s services.Service, podInfo *typedef.PodInfo, wg *sync.WaitGroup) { +- wg.Add(1) + for i := 0; i < retryCount; i++ { + if err := s.AddPod(podInfo); err != nil { + log.Errorf("service %s add func failed: %v", s.ID(), err) +@@ -231,6 +230,7 @@ func (manager *ServiceManager) addFunc(event typedef.Event) { + manager.RLock() + var wg sync.WaitGroup + for _, s := range manager.RunningServices { ++ wg.Add(1) + go addOnce(s, podInfo.DeepCopy(), &wg) + } + wg.Wait() +@@ -250,7 +250,6 @@ func (manager *ServiceManager) updateFunc(event typedef.Event) { + return + } + runOnce := func(s services.Service, old, new *typedef.PodInfo, wg *sync.WaitGroup) { +- wg.Add(1) + log.Debugf("update Func with service: %s", s.ID()) + if err := s.UpdatePod(old, new); err != nil { + log.Errorf("service %s update func failed: %v", s.ID(), err) +@@ -260,6 +259,7 @@ func (manager *ServiceManager) updateFunc(event typedef.Event) { + manager.RLock() + var wg sync.WaitGroup + for _, s := range manager.RunningServices { ++ wg.Add(1) + go runOnce(s, podInfos[0], podInfos[1], &wg) + } + wg.Wait() +@@ -275,7 +275,6 @@ func (manager *ServiceManager) deleteFunc(event typedef.Event) { + } + + deleteOnce := func(s services.Service, podInfo *typedef.PodInfo, wg *sync.WaitGroup) { +- wg.Add(1) + if err := s.DeletePod(podInfo); err != nil { + log.Errorf("service %s delete func failed: %v", s.ID(), err) + } +@@ -284,6 +283,7 @@ func (manager *ServiceManager) deleteFunc(event typedef.Event) { + manager.RLock() + var wg sync.WaitGroup + for _, s := range manager.RunningServices { ++ wg.Add(1) + go deleteOnce(s, podInfo.DeepCopy(), &wg) + } + wg.Wait() +-- +2.32.1 (Apple Git-133) + diff --git a/rubik.spec b/rubik.spec index 1917d8f..14ccc32 100644 --- a/rubik.spec +++ b/rubik.spec @@ -1,6 +1,6 @@ Name: rubik Version: 2.0.0 -Release: 1 +Release: 2 Summary: Hybrid Deployment for Cloud Native License: Mulan PSL V2 URL: https://gitee.com/openeuler/rubik @@ -56,6 +56,12 @@ install -Dp ./build_rubik_image.sh %{buildroot}%{_sharedstatedir}/%{name}/build_ rm -rf %{buildroot} %changelog +* Mon Jun 19 2023 vegbir - 2.0.0-2 +- Type:bugfix +- CVE:NA +- SUG:restart +- DESC:sync upstream patches + * Wed May 24 2023 vegbir - 2.0.0-1 - Type:bugfix - CVE:NA diff --git a/series.conf b/series.conf index 026f14c..74006cc 100644 --- a/series.conf +++ b/series.conf @@ -1 +1,8 @@ +patch/0001-Support-Labels-field-to-configure-QoSLevel.patch +patch/0002-rubik-fix-weight-for-iocost-does-not-take-effect.patch +patch/0003-rubik-test-coverage-for-PSI-Manager.patch +patch/0004-rubik-add-psi-design-documentation.patch +patch/0005-rubik-move-fssr-design-document-to-design-dir.patch +patch/0006-rubik-fix-that-value-of-memory.high_async_ratio-lost.patch +patch/0007-bugfix-fix-typos-calling-order-of-waitgroup.patch #end of file