!30 sync upstream patches

From: @vegbir 
Reviewed-by: @duguhaotian, @jingwoo 
Signed-off-by: @duguhaotian
This commit is contained in:
openeuler-ci-bot 2023-06-19 09:28:18 +00:00 committed by Gitee
commit d266585686
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
11 changed files with 1192 additions and 3 deletions

View File

@ -1 +1 @@
2.0.0-1
2.0.0-2

View File

@ -1 +1 @@
37e6484adbbb01802e969ccf640e5232d974b2fb
8eb2e8d8f046224de4cd37bb404ecc261668a6b3

View File

@ -0,0 +1,188 @@
From b8e000527f7129242bd71f9c79697bef3a8b2111 Mon Sep 17 00:00:00 2001
From: wujing <wujing50@huawei.com>
Date: Wed, 10 May 2023 19:26:36 +0800
Subject: [PATCH 1/7] Support Labels field to configure QoSLevel
Signed-off-by: wujing <wujing50@huawei.com>
---
pkg/core/typedef/podinfo.go | 39 +++++++++++++++++++++++++--
pkg/services/dyncache/dynamic.go | 3 +--
pkg/services/dyncache/sync.go | 3 +--
pkg/services/iocost/iocost.go | 2 +-
pkg/services/preemption/preemption.go | 13 +++------
tests/try/pod.go | 1 +
6 files changed, 44 insertions(+), 17 deletions(-)
diff --git a/pkg/core/typedef/podinfo.go b/pkg/core/typedef/podinfo.go
index 907f02b..fd96848 100644
--- a/pkg/core/typedef/podinfo.go
+++ b/pkg/core/typedef/podinfo.go
@@ -15,6 +15,7 @@
package typedef
import (
+ "isula.org/rubik/pkg/common/constant"
"isula.org/rubik/pkg/core/typedef/cgroup"
)
@@ -26,6 +27,7 @@ type PodInfo struct {
Namespace string `json:"namespace"`
IDContainersMap map[string]*ContainerInfo `json:"containers,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
+ Labels map[string]string `json:"labels,omitempty"`
}
// NewPodInfo creates the PodInfo instance
@@ -37,6 +39,7 @@ func NewPodInfo(pod *RawPod) *PodInfo {
Hierarchy: cgroup.Hierarchy{Path: pod.CgroupPath()},
IDContainersMap: pod.ExtractContainerInfos(),
Annotations: pod.DeepCopy().Annotations,
+ Labels: pod.DeepCopy().Labels,
}
}
@@ -46,8 +49,9 @@ func (pod *PodInfo) DeepCopy() *PodInfo {
return nil
}
var (
- contMap map[string]*ContainerInfo
- annoMap map[string]string
+ contMap map[string]*ContainerInfo
+ annoMap map[string]string
+ labelMap map[string]string
)
// nil is different from empty value in golang
if pod.IDContainersMap != nil {
@@ -56,6 +60,7 @@ func (pod *PodInfo) DeepCopy() *PodInfo {
contMap[id] = cont.DeepCopy()
}
}
+
if pod.Annotations != nil {
annoMap = make(map[string]string)
for k, v := range pod.Annotations {
@@ -63,12 +68,42 @@ func (pod *PodInfo) DeepCopy() *PodInfo {
}
}
+ if pod.Labels != nil {
+ labelMap = make(map[string]string)
+ for k, v := range pod.Labels {
+ labelMap[k] = v
+ }
+ }
+
return &PodInfo{
Name: pod.Name,
UID: pod.UID,
Hierarchy: pod.Hierarchy,
Namespace: pod.Namespace,
Annotations: annoMap,
+ Labels: labelMap,
IDContainersMap: contMap,
}
}
+
+// Offline is used to determine whether the pod is offline
+func (pod *PodInfo) Offline() bool {
+ var anno string
+ var label string
+
+ if pod.Annotations != nil {
+ anno = pod.Annotations[constant.PriorityAnnotationKey]
+ }
+
+ if pod.Labels != nil {
+ label = pod.Labels[constant.PriorityAnnotationKey]
+ }
+
+ // Annotations have a higher priority than labels
+ return anno == "true" || label == "true"
+}
+
+// Online is used to determine whether the pod is online
+func (pod *PodInfo) Online() bool {
+ return !pod.Offline()
+}
diff --git a/pkg/services/dyncache/dynamic.go b/pkg/services/dyncache/dynamic.go
index 09bde4c..d74efc7 100644
--- a/pkg/services/dyncache/dynamic.go
+++ b/pkg/services/dyncache/dynamic.go
@@ -124,8 +124,7 @@ func (c *DynCache) doFlush(limitSet *limitSet) error {
}
func (c *DynCache) listOnlinePods() map[string]*typedef.PodInfo {
- onlineValue := "false"
return c.Viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool {
- return pi.Annotations[constant.PriorityAnnotationKey] == onlineValue
+ return pi.Online()
})
}
diff --git a/pkg/services/dyncache/sync.go b/pkg/services/dyncache/sync.go
index 8307c41..bf59cd4 100644
--- a/pkg/services/dyncache/sync.go
+++ b/pkg/services/dyncache/sync.go
@@ -111,8 +111,7 @@ func (c *DynCache) syncLevel(pod *typedef.PodInfo) error {
}
func (c *DynCache) listOfflinePods() map[string]*typedef.PodInfo {
- offlineValue := "true"
return c.Viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool {
- return pi.Annotations[constant.PriorityAnnotationKey] == offlineValue
+ return pi.Offline()
})
}
diff --git a/pkg/services/iocost/iocost.go b/pkg/services/iocost/iocost.go
index e5298b1..c11ef60 100644
--- a/pkg/services/iocost/iocost.go
+++ b/pkg/services/iocost/iocost.go
@@ -236,7 +236,7 @@ func (b *IOCost) clearIOCost() error {
func (b *IOCost) configPodIOCostWeight(podInfo *typedef.PodInfo) error {
var weight uint64 = offlineWeight
- if podInfo.Annotations[constant.PriorityAnnotationKey] == "false" {
+ if podInfo.Online() {
weight = onlineWeight
}
for _, container := range podInfo.IDContainersMap {
diff --git a/pkg/services/preemption/preemption.go b/pkg/services/preemption/preemption.go
index ce436a3..28ec36e 100644
--- a/pkg/services/preemption/preemption.go
+++ b/pkg/services/preemption/preemption.go
@@ -160,18 +160,11 @@ func getQoSLevel(pod *typedef.PodInfo) int {
if pod == nil {
return constant.Online
}
- anno, ok := pod.Annotations[constant.PriorityAnnotationKey]
- if !ok {
- return constant.Online
- }
- switch anno {
- case "true":
+ if pod.Offline() {
return constant.Offline
- case "false":
- return constant.Online
- default:
- return constant.Online
}
+
+ return constant.Online
}
// Validate will validate the qos service config
diff --git a/tests/try/pod.go b/tests/try/pod.go
index 18cb0ec..8053c4b 100644
--- a/tests/try/pod.go
+++ b/tests/try/pod.go
@@ -60,6 +60,7 @@ func GenFakePodInfo(qosClass corev1.PodQOSClass) *typedef.PodInfo {
UID: constant.PodCgroupNamePrefix + podID,
Hierarchy: cgroup.Hierarchy{Path: genRelativeCgroupPath(qosClass, podID)},
Annotations: make(map[string]string, 0),
+ Labels: make(map[string]string, 0),
}
return fakePod
}
--
2.32.1 (Apple Git-133)

View File

@ -0,0 +1,69 @@
From 804ff7873331cf745bc49aab5f5d2857ec1597c6 Mon Sep 17 00:00:00 2001
From: hanchao <hanchao63@huawei.com>
Date: Mon, 5 Jun 2023 13:56:01 +0800
Subject: [PATCH 2/7] rubik: fix weight for iocost does not take effect
reason: Fix weight for iocost does not take effect.
The iocost weight is at pod level, not container
level.
---
pkg/services/iocost/iocost.go | 7 +------
pkg/services/iocost/iocost_origin.go | 8 ++++----
pkg/services/iocost/iocost_test.go | 2 +-
3 files changed, 6 insertions(+), 11 deletions(-)
diff --git a/pkg/services/iocost/iocost.go b/pkg/services/iocost/iocost.go
index e5298b1..34f508a 100644
--- a/pkg/services/iocost/iocost.go
+++ b/pkg/services/iocost/iocost.go
@@ -239,10 +239,5 @@ func (b *IOCost) configPodIOCostWeight(podInfo *typedef.PodInfo) error {
if podInfo.Annotations[constant.PriorityAnnotationKey] == "false" {
weight = onlineWeight
}
- for _, container := range podInfo.IDContainersMap {
- if err := ConfigContainerIOCostWeight(container.Path, weight); err != nil {
- return err
- }
- }
- return nil
+ return ConfigPodIOCostWeight(podInfo.Path, weight)
}
diff --git a/pkg/services/iocost/iocost_origin.go b/pkg/services/iocost/iocost_origin.go
index d37109f..5e9948f 100644
--- a/pkg/services/iocost/iocost_origin.go
+++ b/pkg/services/iocost/iocost_origin.go
@@ -63,14 +63,14 @@ func ConfigIOCostModel(devno string, p interface{}) error {
return cgroup.WriteCgroupFile(paramStr, blkcgRootDir, iocostModelFile)
}
-// ConfigContainerIOCostWeight for config iocost weight
+// ConfigPodIOCostWeight for config iocost weight
// cgroup v1 iocost cannot be inherited. Therefore, only the container level can be configured.
-func ConfigContainerIOCostWeight(containerRelativePath string, weight uint64) error {
+func ConfigPodIOCostWeight(relativePath string, weight uint64) error {
if err := cgroup.WriteCgroupFile(strconv.FormatUint(weight, scale), blkcgRootDir,
- containerRelativePath, iocostWeightFile); err != nil {
+ relativePath, iocostWeightFile); err != nil {
return err
}
- if err := bindMemcgBlkcg(containerRelativePath); err != nil {
+ if err := bindMemcgBlkcg(relativePath); err != nil {
return err
}
return nil
diff --git a/pkg/services/iocost/iocost_test.go b/pkg/services/iocost/iocost_test.go
index 95b6d97..3bdadad 100644
--- a/pkg/services/iocost/iocost_test.go
+++ b/pkg/services/iocost/iocost_test.go
@@ -334,7 +334,7 @@ func TestSetPodWeight(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- err := ConfigContainerIOCostWeight(tt.cgroupPath, uint64(tt.weight))
+ err := ConfigPodIOCostWeight(tt.cgroupPath, uint64(tt.weight))
if tt.wantErr {
assert.Contains(t, err.Error(), tt.errMsg)
return
--
2.32.1 (Apple Git-133)

View File

@ -0,0 +1,264 @@
From e7c89f1935d117a2778339cc4774393331002254 Mon Sep 17 00:00:00 2001
From: jingxiaolu <lujingxiao@huawei.com>
Date: Mon, 12 Jun 2023 23:12:37 +0800
Subject: [PATCH 3/7] rubik: test coverage for PSI Manager
Adding test cases for PSI Manager
Signed-off-by: jingxiaolu <lujingxiao@huawei.com>
---
Makefile | 3 +
pkg/config/config_test.go | 29 ++++++++
pkg/services/psi/psi.go | 14 ++--
pkg/services/psi/psi_test.go | 126 +++++++++++++++++++++++++++++++++++
pkg/services/service_test.go | 4 ++
5 files changed, 169 insertions(+), 7 deletions(-)
create mode 100644 pkg/services/psi/psi_test.go
diff --git a/Makefile b/Makefile
index 7a92d12..bd66147 100644
--- a/Makefile
+++ b/Makefile
@@ -54,6 +54,7 @@ help:
@echo "make test-unit # run unit test"
@echo "make cover # generate coverage report"
@echo "make install # install files to /var/lib/rubik"
+ @echo "make clean" # clean built files and test logs
prepare:
mkdir -p $(TMP_DIR) $(BUILD_DIR)
@@ -101,3 +102,5 @@ install:
cp -f $(BUILD_DIR)/* $(INSTALL_DIR)
cp -f $(BUILD_DIR)/rubik.service /lib/systemd/system/
+clean:
+ rm -rf build/* cover.* unit_test_log
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index dbbd2e4..03ff4ca 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -53,6 +53,35 @@ var rubikConfig string = `
"mid": 30,
"high": 50
}
+ },
+ "ioCost": [
+ {
+ "nodeName": "k8s-single",
+ "config": [
+ {
+ "dev": "sdb",
+ "enable": true,
+ "model": "linear",
+ "param": {
+ "rbps": 10000000,
+ "rseqiops": 10000000,
+ "rrandiops": 10000000,
+ "wbps": 10000000,
+ "wseqiops": 10000000,
+ "wrandiops": 10000000
+ }
+ }
+ ]
+ }
+ ],
+ "psi": {
+ "interval": 10,
+ "resource": [
+ "cpu",
+ "memory",
+ "io"
+ ],
+ "avg10Threshold": 5.0
}
}
`
diff --git a/pkg/services/psi/psi.go b/pkg/services/psi/psi.go
index 1c70255..a55922e 100644
--- a/pkg/services/psi/psi.go
+++ b/pkg/services/psi/psi.go
@@ -37,19 +37,19 @@ const (
minThreshold float64 = 5.0
)
-// Factory is the QuotaTurbo factory class
+// Factory is the PSI Manager factory class
type Factory struct {
ObjName string
}
// Name returns the factory class name
-func (i Factory) Name() string {
- return "Factory"
+func (f Factory) Name() string {
+ return "PSIFactory"
}
-// NewObj returns a QuotaTurbo object
-func (i Factory) NewObj() (interface{}, error) {
- return NewManager(i.ObjName), nil
+// NewObj returns a Manager object
+func (f Factory) NewObj() (interface{}, error) {
+ return NewManager(f.ObjName), nil
}
// Config is PSI service configuration
@@ -130,7 +130,7 @@ func (m *Manager) SetConfig(f helper.ConfigHandler) error {
}
// IsRunner returns true that tells other Manager is a persistent service
-func (qt *Manager) IsRunner() bool {
+func (m *Manager) IsRunner() bool {
return true
}
diff --git a/pkg/services/psi/psi_test.go b/pkg/services/psi/psi_test.go
new file mode 100644
index 0000000..2036aa1
--- /dev/null
+++ b/pkg/services/psi/psi_test.go
@@ -0,0 +1,126 @@
+// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+// rubik licensed under the Mulan PSL v2.
+// You can use this software according to the terms and conditions of the Mulan PSL v2.
+// You may obtain a copy of Mulan PSL v2 at:
+// http://license.coscl.org.cn/MulanPSL2
+// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+// PURPOSE.
+// See the Mulan PSL v2 for more details.
+// Author: Jingxiao Lu
+// Date: 2023-06-12
+// Description: This file is used for testing psi.go
+
+package psi
+
+import (
+ "context"
+ "fmt"
+ "testing"
+ "time"
+
+ "isula.org/rubik/pkg/api"
+ "isula.org/rubik/pkg/core/typedef"
+)
+
+// TestNewManagerObj tests NewObj() for Factory
+func TestNewManagerObj(t *testing.T) {
+ var fact = Factory{
+ ObjName: "psi",
+ }
+ nm, err := fact.NewObj()
+ if err != nil {
+ t.Fatalf("New PSI Manager failed: %v", err)
+ return
+ }
+ fmt.Printf("New PSI Manager %s is %#v", fact.Name(), nm)
+}
+
+// TestConfigValidate tests Config Validate
+func TestConfigValidate(t *testing.T) {
+ var tests = []struct {
+ name string
+ conf *Config
+ wantErr bool
+ }{
+ {
+ name: "TC1 - Default Config",
+ conf: NewConfig(),
+ wantErr: true,
+ },
+ {
+ name: "TC2 - Wrong Interval value",
+ conf: &Config{
+ Interval: minInterval - 1,
+ },
+ wantErr: true,
+ },
+ {
+ name: "TC3 - Wrong Threshold value",
+ conf: &Config{
+ Interval: minInterval,
+ Avg10Threshold: minThreshold - 1,
+ },
+ wantErr: true,
+ },
+ {
+ name: "TC4 - No resource type specified",
+ conf: &Config{
+ Interval: minInterval,
+ Avg10Threshold: minThreshold,
+ },
+ wantErr: true,
+ },
+ {
+ name: "TC5 - Wrong resource type cpuacct - cpuacct is for psi subsystem, not for resource type",
+ conf: &Config{
+ Interval: minInterval,
+ Avg10Threshold: minThreshold,
+ Resource: []string{"cpu", "memory", "io", "cpuacct"},
+ },
+ wantErr: true,
+ },
+ {
+ name: "TC6 - Success case - trully end",
+ conf: &Config{
+ Interval: minInterval,
+ Avg10Threshold: minThreshold,
+ Resource: []string{"cpu", "memory", "io"},
+ },
+ wantErr: false,
+ },
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ if err := tc.conf.Validate(); (err != nil) != tc.wantErr {
+ t.Errorf("Config.Validate() error = %v, wantErr %v", err, tc.wantErr)
+ }
+ })
+ }
+}
+
+type FakeManager struct{}
+
+func (m *FakeManager) ListContainersWithOptions(options ...api.ListOption) map[string]*typedef.ContainerInfo {
+ return make(map[string]*typedef.ContainerInfo)
+}
+func (m *FakeManager) ListPodsWithOptions(options ...api.ListOption) map[string]*typedef.PodInfo {
+ return make(map[string]*typedef.PodInfo, 1)
+}
+
+// TestManagerRun creates a fake manager and runs it
+func TestManagerRun(t *testing.T) {
+ nm := NewManager("psi")
+ nm.conf.Interval = 1
+ nm.PreStart(&FakeManager{})
+ nm.SetConfig(func(configName string, d interface{}) error { return nil })
+ if !nm.IsRunner() {
+ t.Fatalf("FakeManager is not a runner!")
+ return
+ }
+
+ ctx, cancel := context.WithCancel(context.Background())
+ go nm.Run(ctx)
+ time.Sleep(time.Second)
+ cancel()
+}
diff --git a/pkg/services/service_test.go b/pkg/services/service_test.go
index a6e0298..537d0b3 100644
--- a/pkg/services/service_test.go
+++ b/pkg/services/service_test.go
@@ -36,6 +36,10 @@ var defaultFeature = []FeatureSpec{
Name: feature.QuotaTurboFeature,
Default: true,
},
+ {
+ Name: feature.PSIFeature,
+ Default: true,
+ },
}
func TestErrorInitServiceComponents(t *testing.T) {
--
2.32.1 (Apple Git-133)

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,83 @@
From c74d87538bc27a20c24d27319c5d60970ab3ccab Mon Sep 17 00:00:00 2001
From: vegbir <yangjiaqi16@huawei.com>
Date: Wed, 14 Jun 2023 17:14:16 +0800
Subject: [PATCH 5/7] rubik: move fssr design document to design dir
Signed-off-by: vegbir <yangjiaqi16@huawei.com>
---
docs/{ => design}/fssr.md | 18 ++++++++++++++----
.../fssr/flowchart.png} | Bin
.../fssr/sequence_diagram.png} | Bin
3 files changed, 14 insertions(+), 4 deletions(-)
rename docs/{ => design}/fssr.md (90%)
rename docs/{png/rubik_fssr_2.png => images/fssr/flowchart.png} (100%)
rename docs/{png/rubik_fssr_1.png => images/fssr/sequence_diagram.png} (100%)
diff --git a/docs/fssr.md b/docs/design/fssr.md
similarity index 90%
rename from docs/fssr.md
rename to docs/design/fssr.md
index 3fb36bd..184b364 100644
--- a/docs/fssr.md
+++ b/docs/design/fssr.md
@@ -1,20 +1,27 @@
+# 【需求设计】异步内存分级回收 fssr策略
+
## 方案目标
+
在混部集群中在线和离线业务被同时部署到同一物理资源节点同时离线业务是内存资源消耗型在线业务有波峰波谷在离线业务之间内存资源竞争导致在线业务受影响。该方案目标在充分利用内存资源的同时保证在线QoS。
## 总体设计
+
各个模块之间的联系如下:
-![](png/rubik_fssr_1.png)
+![sequence_diagram](../images/fssr/sequence_diagram.png)
- 用户部署rubikrubik向k8s注册监听pod事件。
- 当离线业务被部署时k8s会通知rubikrubik向该离线pod配置memory.high。
- 同时rubik实时监控当前节点的内存使用量使用fssr策略向pod配置memory.high。
### 依赖说明
+
内核需要支持memcg级内存水位线方案即提供`memory.high`和`memory.high_async_ratio`。
### 详细设计
+
内存分级方案中rubik新增FSSR内存处理模块该模块主要处理获取主机节点的总内存(total memory)、预留内存(reserved memory)、剩余内存(free memory)。并根据FSSR算法设置离线内存的memory.high。具体策略如下
-![](png/rubik_fssr_2.png)
+![flowchart](../images/fssr/flowchart.png)
+
- rubik启动时计算预留内存默认为总内存的10%如果总内存的10%超过10G则为10G
- 配置离线容器的cgroup级别水位线内核提供`memory.high`和`memory.high_async_ratio`两个接口分别配置cgroup的软上限和警戒水位线。启动rubik时默认配置`memory.high`为`total_memory`(总内存)`*`80%
- 获取剩余内存free_memory
@@ -22,13 +29,16 @@
- 持续一分钟free_memory>2`*`reserved_memory时提高离线的memory.high每次提升总内存的1%total_memory`*`1%
说明:
+
1. 离线应用memory.high的范围为`[total_memory*30%, total_memory*80%]`
### 配置说明
-```
+
+```json
"dynMemory": {
"policy": "fssr"
}
```
+
- dynMemory表示动态内存
-- policy目前只支持fssr
\ No newline at end of file
+- policy目前只支持fssr
diff --git a/docs/png/rubik_fssr_2.png b/docs/images/fssr/flowchart.png
similarity index 100%
rename from docs/png/rubik_fssr_2.png
rename to docs/images/fssr/flowchart.png
diff --git a/docs/png/rubik_fssr_1.png b/docs/images/fssr/sequence_diagram.png
similarity index 100%
rename from docs/png/rubik_fssr_1.png
rename to docs/images/fssr/sequence_diagram.png
--
2.32.1 (Apple Git-133)

View File

@ -0,0 +1,246 @@
From 526bd12a3b77135ce5f112f3195f1e7d41d965d5 Mon Sep 17 00:00:00 2001
From: hanchao <hanchao63@huawei.com>
Date: Fri, 16 Jun 2023 13:10:42 +0800
Subject: [PATCH 6/7] rubik: fix that value of memory.high_async_ratio lost
efficacy
---
pkg/services/dynmemory/dynmemory.go | 22 +++++++-
pkg/services/dynmemory/fssr.go | 78 +++++++++++++++++------------
2 files changed, 67 insertions(+), 33 deletions(-)
diff --git a/pkg/services/dynmemory/dynmemory.go b/pkg/services/dynmemory/dynmemory.go
index da859dd..b73f476 100644
--- a/pkg/services/dynmemory/dynmemory.go
+++ b/pkg/services/dynmemory/dynmemory.go
@@ -6,6 +6,7 @@ import (
"time"
"isula.org/rubik/pkg/api"
+ "isula.org/rubik/pkg/core/typedef"
"isula.org/rubik/pkg/services/helper"
"k8s.io/apimachinery/pkg/util/wait"
)
@@ -15,6 +16,7 @@ type DynMemoryAdapter interface {
preStart(api.Viewer) error
getInterval() int
dynamicAdjust()
+ setOfflinePod(path string) error
}
type dynMemoryConfig struct {
Policy string `json:"policy,omitempty"`
@@ -42,11 +44,11 @@ type DynMemory struct {
}
// PreStart is an interface for calling a collection of methods when the service is pre-started
-func (dynMem *DynMemory) PreStart(api api.Viewer) error {
+func (dynMem *DynMemory) PreStart(viewer api.Viewer) error {
if dynMem.dynMemoryAdapter == nil {
return nil
}
- return dynMem.dynMemoryAdapter.preStart(api)
+ return dynMem.dynMemoryAdapter.preStart(viewer)
}
// SetConfig is an interface that invoke the ConfigHandler to obtain the corresponding configuration.
@@ -81,6 +83,22 @@ func (dynMem *DynMemory) IsRunner() bool {
return true
}
+// AddPod to deal the event of adding a pod.
+func (dynMem *DynMemory) AddPod(podInfo *typedef.PodInfo) error {
+ if podInfo.Offline() {
+ return dynMem.dynMemoryAdapter.setOfflinePod(podInfo.Path)
+ }
+ return nil
+}
+
+// UpdatePod to deal the pod update event.
+func (dynMem *DynMemory) UpdatePod(old, new *typedef.PodInfo) error {
+ if new.Offline() {
+ return dynMem.dynMemoryAdapter.setOfflinePod(new.Path)
+ }
+ return nil
+}
+
// newAdapter to create adapter of dyn memory.
func newAdapter(policy string) DynMemoryAdapter {
switch policy {
diff --git a/pkg/services/dynmemory/fssr.go b/pkg/services/dynmemory/fssr.go
index 9fe4042..e23a4bc 100644
--- a/pkg/services/dynmemory/fssr.go
+++ b/pkg/services/dynmemory/fssr.go
@@ -9,6 +9,7 @@ import (
"isula.org/rubik/pkg/api"
"isula.org/rubik/pkg/common/constant"
+ "isula.org/rubik/pkg/common/log"
"isula.org/rubik/pkg/common/util"
"isula.org/rubik/pkg/core/typedef"
"isula.org/rubik/pkg/core/typedef/cgroup"
@@ -30,71 +31,76 @@ type fssrDynMemAdapter struct {
memTotal int64
memHigh int64
reservedMem int64
- api api.Viewer
count int64
+ viewer api.Viewer
}
-// initFssrDynMemAdapter function
+// initFssrDynMemAdapter initializes a new fssrDynMemAdapter struct.
func initFssrDynMemAdapter() *fssrDynMemAdapter {
if total, err := getFieldMemory("MemTotal"); err == nil && total > 0 {
return &fssrDynMemAdapter{
memTotal: total,
memHigh: total * 8 / 10,
- reservedMem: total * 8 / 10,
+ reservedMem: total * 1 / 10,
+ count: 0,
}
}
return nil
}
-// preStart function
-func (f *fssrDynMemAdapter) preStart(api api.Viewer) error {
- f.api = api
+// preStart initializes the fssrDynMemAdapter with the provided viewer and
+// deals with any existing pods.
+func (f *fssrDynMemAdapter) preStart(viewer api.Viewer) error {
+ f.viewer = viewer
return f.dealExistedPods()
}
-// getInterval function
+// getInterval returns the fssrInterval value.
func (f *fssrDynMemAdapter) getInterval() int {
return fssrInterval
}
-// dynadjust function
+// dynamicAdjust adjusts the memory allocation of the fssrDynMemAdapter by
+// increasing or decreasing the amount of memory reserved for offline pods
+// based on the current amount of free memory available on the system.
func (f *fssrDynMemAdapter) dynamicAdjust() {
var freeMem int64
var err error
if freeMem, err = getFieldMemory("MemFree"); err != nil {
return
}
+
+ var memHigh int64 = 0
if freeMem > 2*f.reservedMem {
if f.count < fssrIntervalCount {
f.count++
return
}
- memHigh := f.memHigh + f.memTotal/100
+ // no risk of overflow
+ memHigh = f.memHigh + f.memTotal/100
if memHigh > f.memTotal*8/10 {
memHigh = f.memTotal * 8 / 10
}
- if memHigh != f.memHigh {
- f.memHigh = memHigh
- f.adjustOfflinePodHighMemory()
- }
} else if freeMem < f.reservedMem {
- memHigh := f.memHigh - f.memTotal/10
+ memHigh = f.memHigh - f.memTotal/10
if memHigh < 0 {
return
}
if memHigh < f.memTotal*3/10 {
memHigh = f.memTotal * 3 / 10
}
- if memHigh != f.memHigh {
- f.memHigh = memHigh
- f.adjustOfflinePodHighMemory()
- }
}
+ if memHigh != f.memHigh {
+ f.memHigh = memHigh
+ f.adjustOfflinePodHighMemory()
+ }
+
f.count = 0
}
+// adjustOfflinePodHighMemory adjusts the memory.high of offline pods.
func (f *fssrDynMemAdapter) adjustOfflinePodHighMemory() error {
- pods := listOfflinePods(f.api)
+ pods := listOfflinePods(f.viewer)
for _, podInfo := range pods {
if err := setOfflinePodHighMemory(podInfo.Path, f.memHigh); err != nil {
return err
@@ -103,20 +109,18 @@ func (f *fssrDynMemAdapter) adjustOfflinePodHighMemory() error {
return nil
}
-// dealExistedPods function
+// dealExistedPods handles offline pods by setting their memory.high and memory.high_async_ratio
func (f *fssrDynMemAdapter) dealExistedPods() error {
- pods := listOfflinePods(f.api)
+ pods := listOfflinePods(f.viewer)
for _, podInfo := range pods {
- if err := setOfflinePodHighMemory(podInfo.Path, f.memHigh); err != nil {
- return err
- }
- if err := setOfflinePodHighAsyncRatio(podInfo.Path, highRatio); err != nil {
- return err
+ if err := f.setOfflinePod(podInfo.Path); err != nil {
+ log.Errorf("set fssr of offline pod[%v] error:%v", podInfo.UID, err)
}
}
return nil
}
+// listOfflinePods returns a map of offline PodInfo objects.
func listOfflinePods(viewer api.Viewer) map[string]*typedef.PodInfo {
offlineValue := "true"
return viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool {
@@ -124,23 +128,35 @@ func listOfflinePods(viewer api.Viewer) map[string]*typedef.PodInfo {
})
}
-func setOfflinePodHighMemory(podPath string, high int64) error {
- if err := cgroup.WriteCgroupFile(strconv.FormatUint(uint64(high), scale), memcgRootDir,
+// setOfflinePod sets the offline pod for the given path.
+func (f *fssrDynMemAdapter) setOfflinePod(path string) error {
+ if err := setOfflinePodHighAsyncRatio(path, highRatio); err != nil {
+ return err
+ }
+ return setOfflinePodHighMemory(path, f.memHigh)
+}
+
+// setOfflinePodHighMemory sets the high memory limit for the specified pod in the
+// cgroup memory
+func setOfflinePodHighMemory(podPath string, memHigh int64) error {
+ if err := cgroup.WriteCgroupFile(strconv.FormatUint(uint64(memHigh), scale), memcgRootDir,
podPath, highMemFile); err != nil {
return err
}
return nil
}
-func setOfflinePodHighAsyncRatio(podPath string, ratio uint64) error {
- if err := cgroup.WriteCgroupFile(strconv.FormatUint(ratio, scale), memcgRootDir,
+// setOfflinePodHighAsyncRatio sets the high memory async ratio for a pod in an offline state.
+func setOfflinePodHighAsyncRatio(podPath string, ratio uint) error {
+ if err := cgroup.WriteCgroupFile(strconv.FormatUint(uint64(ratio), scale), memcgRootDir,
podPath, highMemAsyncRatioFile); err != nil {
return err
}
return nil
}
-// getFieldMemory function
+// getFieldMemory retrieves the amount of memory used by a certain field in the
+// memory information file.
func getFieldMemory(field string) (int64, error) {
if !util.PathExist(memInfoFile) {
return 0, fmt.Errorf("%v: no such file or diretory", memInfoFile)
--
2.32.1 (Apple Git-133)

View File

@ -0,0 +1,127 @@
From 6b9d862857a1b302b26d8d51e7df5fed3062ba94 Mon Sep 17 00:00:00 2001
From: vegbir <yangjiaqi16@huawei.com>
Date: Mon, 19 Jun 2023 11:47:53 +0800
Subject: [PATCH 7/7] bugfix: fix typos & calling order of waitgroup
Signed-off-by: vegbir <yangjiaqi16@huawei.com>
---
pkg/core/trigger/base.go | 4 ++--
pkg/core/trigger/expulsion.go | 2 +-
pkg/core/trigger/resourceanalysis.go | 6 +++---
pkg/rubik/servicemanager.go | 6 +++---
4 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/pkg/core/trigger/base.go b/pkg/core/trigger/base.go
index 7f1fbe9..c212f66 100644
--- a/pkg/core/trigger/base.go
+++ b/pkg/core/trigger/base.go
@@ -70,8 +70,8 @@ type TreeTrigger struct {
subTriggers []Trigger
}
-// withTreeTirgger returns a BaseMetric object
-func withTreeTirgger(name string, exec Executor) *TreeTrigger {
+// withTreeTrigger returns a BaseMetric object
+func withTreeTrigger(name string, exec Executor) *TreeTrigger {
return &TreeTrigger{
name: name,
exec: exec,
diff --git a/pkg/core/trigger/expulsion.go b/pkg/core/trigger/expulsion.go
index 87dd484..e438d3d 100644
--- a/pkg/core/trigger/expulsion.go
+++ b/pkg/core/trigger/expulsion.go
@@ -41,7 +41,7 @@ var expulsionCreator = func() Trigger {
appendUsedExecutors(ExpulsionAnno, expulsionExec)
}
}
- return withTreeTirgger(ExpulsionAnno, expulsionExec)
+ return withTreeTrigger(ExpulsionAnno, expulsionExec)
}
// Expulsion is the trigger to evict pods
diff --git a/pkg/core/trigger/resourceanalysis.go b/pkg/core/trigger/resourceanalysis.go
index a3d99e5..7e7413e 100644
--- a/pkg/core/trigger/resourceanalysis.go
+++ b/pkg/core/trigger/resourceanalysis.go
@@ -49,7 +49,7 @@ var analyzerCreator = func() Trigger {
appendUsedExecutors(ResourceAnalysisAnno, resourceAnalysisExec)
}
}
- return withTreeTirgger(ResourceAnalysisAnno, resourceAnalysisExec)
+ return withTreeTrigger(ResourceAnalysisAnno, resourceAnalysisExec)
}
// rreqOpt is the option to get information from cadvisor
@@ -161,7 +161,7 @@ func (a *Analyzer) maxCPUUtil(pods map[string]*typedef.PodInfo) *typedef.PodInfo
}
}
if chosen != nil {
- log.Infof("find the max cpu util pod \"%v\": %v", chosen.Name, maxUtil)
+ log.Infof("find the pod(%v) with the highest cpu utilization(%v)", chosen.Name, maxUtil)
}
return chosen
}
@@ -185,7 +185,7 @@ func (a *Analyzer) maxMemoryUtil(pods map[string]*typedef.PodInfo) *typedef.PodI
}
}
if chosen != nil {
- log.Infof("find the max cpu util pod \"%v\": %v", chosen.Name, maxUtil)
+ log.Infof("find the pod(%v) with the highest memory utilization(%v)", chosen.Name, maxUtil)
}
return chosen
}
diff --git a/pkg/rubik/servicemanager.go b/pkg/rubik/servicemanager.go
index 3e162b6..c3b252a 100644
--- a/pkg/rubik/servicemanager.go
+++ b/pkg/rubik/servicemanager.go
@@ -218,7 +218,6 @@ func (manager *ServiceManager) addFunc(event typedef.Event) {
const retryCount = 5
addOnce := func(s services.Service, podInfo *typedef.PodInfo, wg *sync.WaitGroup) {
- wg.Add(1)
for i := 0; i < retryCount; i++ {
if err := s.AddPod(podInfo); err != nil {
log.Errorf("service %s add func failed: %v", s.ID(), err)
@@ -231,6 +230,7 @@ func (manager *ServiceManager) addFunc(event typedef.Event) {
manager.RLock()
var wg sync.WaitGroup
for _, s := range manager.RunningServices {
+ wg.Add(1)
go addOnce(s, podInfo.DeepCopy(), &wg)
}
wg.Wait()
@@ -250,7 +250,6 @@ func (manager *ServiceManager) updateFunc(event typedef.Event) {
return
}
runOnce := func(s services.Service, old, new *typedef.PodInfo, wg *sync.WaitGroup) {
- wg.Add(1)
log.Debugf("update Func with service: %s", s.ID())
if err := s.UpdatePod(old, new); err != nil {
log.Errorf("service %s update func failed: %v", s.ID(), err)
@@ -260,6 +259,7 @@ func (manager *ServiceManager) updateFunc(event typedef.Event) {
manager.RLock()
var wg sync.WaitGroup
for _, s := range manager.RunningServices {
+ wg.Add(1)
go runOnce(s, podInfos[0], podInfos[1], &wg)
}
wg.Wait()
@@ -275,7 +275,6 @@ func (manager *ServiceManager) deleteFunc(event typedef.Event) {
}
deleteOnce := func(s services.Service, podInfo *typedef.PodInfo, wg *sync.WaitGroup) {
- wg.Add(1)
if err := s.DeletePod(podInfo); err != nil {
log.Errorf("service %s delete func failed: %v", s.ID(), err)
}
@@ -284,6 +283,7 @@ func (manager *ServiceManager) deleteFunc(event typedef.Event) {
manager.RLock()
var wg sync.WaitGroup
for _, s := range manager.RunningServices {
+ wg.Add(1)
go deleteOnce(s, podInfo.DeepCopy(), &wg)
}
wg.Wait()
--
2.32.1 (Apple Git-133)

View File

@ -1,6 +1,6 @@
Name: rubik
Version: 2.0.0
Release: 1
Release: 2
Summary: Hybrid Deployment for Cloud Native
License: Mulan PSL V2
URL: https://gitee.com/openeuler/rubik
@ -56,6 +56,12 @@ install -Dp ./build_rubik_image.sh %{buildroot}%{_sharedstatedir}/%{name}/build_
rm -rf %{buildroot}
%changelog
* Mon Jun 19 2023 vegbir <yangjiaqi16@huawei.com> - 2.0.0-2
- Type:bugfix
- CVE:NA
- SUG:restart
- DESC:sync upstream patches
* Wed May 24 2023 vegbir <yangjiaqi16@huawei.com> - 2.0.0-1
- Type:bugfix
- CVE:NA

View File

@ -1 +1,8 @@
patch/0001-Support-Labels-field-to-configure-QoSLevel.patch
patch/0002-rubik-fix-weight-for-iocost-does-not-take-effect.patch
patch/0003-rubik-test-coverage-for-PSI-Manager.patch
patch/0004-rubik-add-psi-design-documentation.patch
patch/0005-rubik-move-fssr-design-document-to-design-dir.patch
patch/0006-rubik-fix-that-value-of-memory.high_async_ratio-lost.patch
patch/0007-bugfix-fix-typos-calling-order-of-waitgroup.patch
#end of file