Compare commits
10 Commits
cfc17e70fe
...
ef6e8fd904
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef6e8fd904 | ||
|
|
e67576189c | ||
|
|
ea58313de4 | ||
|
|
11dfe10561 | ||
|
|
036fb7ab80 | ||
|
|
a1d5d5f63d | ||
|
|
0f26cb4552 | ||
|
|
3f2f9db304 | ||
|
|
ac1646b9b8 | ||
|
|
7c5f6296e6 |
@ -1 +1 @@
|
||||
2.0.0-3
|
||||
2.0.1-2
|
||||
|
||||
@ -1 +1 @@
|
||||
fdaef8a93069235bac826890a028f8fa7e7d1620
|
||||
4a7405a1676d1c80e9938f3e1328cbee82e1a65c
|
||||
|
||||
@ -1,188 +0,0 @@
|
||||
From b8e000527f7129242bd71f9c79697bef3a8b2111 Mon Sep 17 00:00:00 2001
|
||||
From: wujing <wujing50@huawei.com>
|
||||
Date: Wed, 10 May 2023 19:26:36 +0800
|
||||
Subject: [PATCH 1/7] Support Labels field to configure QoSLevel
|
||||
|
||||
Signed-off-by: wujing <wujing50@huawei.com>
|
||||
---
|
||||
pkg/core/typedef/podinfo.go | 39 +++++++++++++++++++++++++--
|
||||
pkg/services/dyncache/dynamic.go | 3 +--
|
||||
pkg/services/dyncache/sync.go | 3 +--
|
||||
pkg/services/iocost/iocost.go | 2 +-
|
||||
pkg/services/preemption/preemption.go | 13 +++------
|
||||
tests/try/pod.go | 1 +
|
||||
6 files changed, 44 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/pkg/core/typedef/podinfo.go b/pkg/core/typedef/podinfo.go
|
||||
index 907f02b..fd96848 100644
|
||||
--- a/pkg/core/typedef/podinfo.go
|
||||
+++ b/pkg/core/typedef/podinfo.go
|
||||
@@ -15,6 +15,7 @@
|
||||
package typedef
|
||||
|
||||
import (
|
||||
+ "isula.org/rubik/pkg/common/constant"
|
||||
"isula.org/rubik/pkg/core/typedef/cgroup"
|
||||
)
|
||||
|
||||
@@ -26,6 +27,7 @@ type PodInfo struct {
|
||||
Namespace string `json:"namespace"`
|
||||
IDContainersMap map[string]*ContainerInfo `json:"containers,omitempty"`
|
||||
Annotations map[string]string `json:"annotations,omitempty"`
|
||||
+ Labels map[string]string `json:"labels,omitempty"`
|
||||
}
|
||||
|
||||
// NewPodInfo creates the PodInfo instance
|
||||
@@ -37,6 +39,7 @@ func NewPodInfo(pod *RawPod) *PodInfo {
|
||||
Hierarchy: cgroup.Hierarchy{Path: pod.CgroupPath()},
|
||||
IDContainersMap: pod.ExtractContainerInfos(),
|
||||
Annotations: pod.DeepCopy().Annotations,
|
||||
+ Labels: pod.DeepCopy().Labels,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,8 +49,9 @@ func (pod *PodInfo) DeepCopy() *PodInfo {
|
||||
return nil
|
||||
}
|
||||
var (
|
||||
- contMap map[string]*ContainerInfo
|
||||
- annoMap map[string]string
|
||||
+ contMap map[string]*ContainerInfo
|
||||
+ annoMap map[string]string
|
||||
+ labelMap map[string]string
|
||||
)
|
||||
// nil is different from empty value in golang
|
||||
if pod.IDContainersMap != nil {
|
||||
@@ -56,6 +60,7 @@ func (pod *PodInfo) DeepCopy() *PodInfo {
|
||||
contMap[id] = cont.DeepCopy()
|
||||
}
|
||||
}
|
||||
+
|
||||
if pod.Annotations != nil {
|
||||
annoMap = make(map[string]string)
|
||||
for k, v := range pod.Annotations {
|
||||
@@ -63,12 +68,42 @@ func (pod *PodInfo) DeepCopy() *PodInfo {
|
||||
}
|
||||
}
|
||||
|
||||
+ if pod.Labels != nil {
|
||||
+ labelMap = make(map[string]string)
|
||||
+ for k, v := range pod.Labels {
|
||||
+ labelMap[k] = v
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
return &PodInfo{
|
||||
Name: pod.Name,
|
||||
UID: pod.UID,
|
||||
Hierarchy: pod.Hierarchy,
|
||||
Namespace: pod.Namespace,
|
||||
Annotations: annoMap,
|
||||
+ Labels: labelMap,
|
||||
IDContainersMap: contMap,
|
||||
}
|
||||
}
|
||||
+
|
||||
+// Offline is used to determine whether the pod is offline
|
||||
+func (pod *PodInfo) Offline() bool {
|
||||
+ var anno string
|
||||
+ var label string
|
||||
+
|
||||
+ if pod.Annotations != nil {
|
||||
+ anno = pod.Annotations[constant.PriorityAnnotationKey]
|
||||
+ }
|
||||
+
|
||||
+ if pod.Labels != nil {
|
||||
+ label = pod.Labels[constant.PriorityAnnotationKey]
|
||||
+ }
|
||||
+
|
||||
+ // Annotations have a higher priority than labels
|
||||
+ return anno == "true" || label == "true"
|
||||
+}
|
||||
+
|
||||
+// Online is used to determine whether the pod is online
|
||||
+func (pod *PodInfo) Online() bool {
|
||||
+ return !pod.Offline()
|
||||
+}
|
||||
diff --git a/pkg/services/dyncache/dynamic.go b/pkg/services/dyncache/dynamic.go
|
||||
index 09bde4c..d74efc7 100644
|
||||
--- a/pkg/services/dyncache/dynamic.go
|
||||
+++ b/pkg/services/dyncache/dynamic.go
|
||||
@@ -124,8 +124,7 @@ func (c *DynCache) doFlush(limitSet *limitSet) error {
|
||||
}
|
||||
|
||||
func (c *DynCache) listOnlinePods() map[string]*typedef.PodInfo {
|
||||
- onlineValue := "false"
|
||||
return c.Viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool {
|
||||
- return pi.Annotations[constant.PriorityAnnotationKey] == onlineValue
|
||||
+ return pi.Online()
|
||||
})
|
||||
}
|
||||
diff --git a/pkg/services/dyncache/sync.go b/pkg/services/dyncache/sync.go
|
||||
index 8307c41..bf59cd4 100644
|
||||
--- a/pkg/services/dyncache/sync.go
|
||||
+++ b/pkg/services/dyncache/sync.go
|
||||
@@ -111,8 +111,7 @@ func (c *DynCache) syncLevel(pod *typedef.PodInfo) error {
|
||||
}
|
||||
|
||||
func (c *DynCache) listOfflinePods() map[string]*typedef.PodInfo {
|
||||
- offlineValue := "true"
|
||||
return c.Viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool {
|
||||
- return pi.Annotations[constant.PriorityAnnotationKey] == offlineValue
|
||||
+ return pi.Offline()
|
||||
})
|
||||
}
|
||||
diff --git a/pkg/services/iocost/iocost.go b/pkg/services/iocost/iocost.go
|
||||
index e5298b1..c11ef60 100644
|
||||
--- a/pkg/services/iocost/iocost.go
|
||||
+++ b/pkg/services/iocost/iocost.go
|
||||
@@ -236,7 +236,7 @@ func (b *IOCost) clearIOCost() error {
|
||||
|
||||
func (b *IOCost) configPodIOCostWeight(podInfo *typedef.PodInfo) error {
|
||||
var weight uint64 = offlineWeight
|
||||
- if podInfo.Annotations[constant.PriorityAnnotationKey] == "false" {
|
||||
+ if podInfo.Online() {
|
||||
weight = onlineWeight
|
||||
}
|
||||
for _, container := range podInfo.IDContainersMap {
|
||||
diff --git a/pkg/services/preemption/preemption.go b/pkg/services/preemption/preemption.go
|
||||
index ce436a3..28ec36e 100644
|
||||
--- a/pkg/services/preemption/preemption.go
|
||||
+++ b/pkg/services/preemption/preemption.go
|
||||
@@ -160,18 +160,11 @@ func getQoSLevel(pod *typedef.PodInfo) int {
|
||||
if pod == nil {
|
||||
return constant.Online
|
||||
}
|
||||
- anno, ok := pod.Annotations[constant.PriorityAnnotationKey]
|
||||
- if !ok {
|
||||
- return constant.Online
|
||||
- }
|
||||
- switch anno {
|
||||
- case "true":
|
||||
+ if pod.Offline() {
|
||||
return constant.Offline
|
||||
- case "false":
|
||||
- return constant.Online
|
||||
- default:
|
||||
- return constant.Online
|
||||
}
|
||||
+
|
||||
+ return constant.Online
|
||||
}
|
||||
|
||||
// Validate will validate the qos service config
|
||||
diff --git a/tests/try/pod.go b/tests/try/pod.go
|
||||
index 18cb0ec..8053c4b 100644
|
||||
--- a/tests/try/pod.go
|
||||
+++ b/tests/try/pod.go
|
||||
@@ -60,6 +60,7 @@ func GenFakePodInfo(qosClass corev1.PodQOSClass) *typedef.PodInfo {
|
||||
UID: constant.PodCgroupNamePrefix + podID,
|
||||
Hierarchy: cgroup.Hierarchy{Path: genRelativeCgroupPath(qosClass, podID)},
|
||||
Annotations: make(map[string]string, 0),
|
||||
+ Labels: make(map[string]string, 0),
|
||||
}
|
||||
return fakePod
|
||||
}
|
||||
--
|
||||
2.32.1 (Apple Git-133)
|
||||
|
||||
331
patch/0001-rubik-remove-duplicate-log.patch
Normal file
331
patch/0001-rubik-remove-duplicate-log.patch
Normal file
@ -0,0 +1,331 @@
|
||||
From 15aafa185d19c34d02c6e7a459d997f3751b410d Mon Sep 17 00:00:00 2001
|
||||
From: vegbir <yangjiaqi16@huawei.com>
|
||||
Date: Thu, 21 Nov 2024 15:02:45 +0000
|
||||
Subject: [PATCH] rubik: remove duplicate log
|
||||
|
||||
Signed-off-by: vegbir <yangjiaqi16@huawei.com>
|
||||
---
|
||||
pkg/core/trigger/common/type.go | 1 -
|
||||
pkg/core/trigger/executor/resource.go | 12 ++++++------
|
||||
pkg/core/trigger/template/base.go | 8 --------
|
||||
pkg/core/typedef/containerinfo.go | 11 +++++------
|
||||
pkg/core/typedef/rawpod.go | 14 ++++++--------
|
||||
pkg/resource/analyze/analyzer.go | 9 +++++----
|
||||
pkg/services/eviction/common/manager.go | 15 +++++++++++----
|
||||
pkg/services/eviction/cpu/cpu.go | 19 ++++++++++---------
|
||||
pkg/services/eviction/memory/memory.go | 13 +++++++------
|
||||
9 files changed, 50 insertions(+), 52 deletions(-)
|
||||
|
||||
diff --git a/pkg/core/trigger/common/type.go b/pkg/core/trigger/common/type.go
|
||||
index 901ca0d..5bfa5eb 100644
|
||||
--- a/pkg/core/trigger/common/type.go
|
||||
+++ b/pkg/core/trigger/common/type.go
|
||||
@@ -22,7 +22,6 @@ type (
|
||||
|
||||
const (
|
||||
TARGETPODS Factor = iota
|
||||
- DEPORTPOD
|
||||
)
|
||||
|
||||
// Descriptor defines methods for describing triggers
|
||||
diff --git a/pkg/core/trigger/executor/resource.go b/pkg/core/trigger/executor/resource.go
|
||||
index 46731dd..2f28271 100644
|
||||
--- a/pkg/core/trigger/executor/resource.go
|
||||
+++ b/pkg/core/trigger/executor/resource.go
|
||||
@@ -36,7 +36,7 @@ func MaxValueTransformer(cal analyze.Calculator) template.Transformation {
|
||||
|
||||
pods, ok := ctx.Value(common.TARGETPODS).(map[string]*typedef.PodInfo)
|
||||
if !ok {
|
||||
- return ctx, fmt.Errorf("failed to get target pods")
|
||||
+ return ctx, fmt.Errorf("invalid target pod type")
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
@@ -54,12 +54,12 @@ func MaxValueTransformer(cal analyze.Calculator) template.Transformation {
|
||||
chosen = pod
|
||||
}
|
||||
}
|
||||
-
|
||||
- if chosen != nil {
|
||||
- log.Infof("find the pod(%v) with the highest utilization(%v)", chosen.Name, maxValue)
|
||||
- return context.WithValue(ctx, common.TARGETPODS, map[string]*typedef.PodInfo{chosen.Name: chosen}), nil
|
||||
+ // If the object is successfully obtained, the object is returned, otherwise an empty object is returned
|
||||
+ if chosen == nil {
|
||||
+ return context.WithValue(ctx, common.TARGETPODS, map[string]*typedef.PodInfo{}), nil
|
||||
}
|
||||
- return context.Background(), fmt.Errorf("failed to find target pod")
|
||||
+ log.Infof("find the pod(%v) with the highest utilization(%.2f%%)", chosen.Name, maxValue)
|
||||
+ return context.WithValue(ctx, common.TARGETPODS, map[string]*typedef.PodInfo{chosen.Name: chosen}), nil
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/pkg/core/trigger/template/base.go b/pkg/core/trigger/template/base.go
|
||||
index f627884..9ad61de 100644
|
||||
--- a/pkg/core/trigger/template/base.go
|
||||
+++ b/pkg/core/trigger/template/base.go
|
||||
@@ -55,10 +55,6 @@ func transform(ctx context.Context, f Transformation) (context.Context, error) {
|
||||
if f == nil {
|
||||
return nil, fmt.Errorf("podFilter method is not implemented")
|
||||
}
|
||||
- // pods, ok := ctx.Value(common.TARGETPODS).(map[string]*typedef.PodInfo)
|
||||
- // if !ok {
|
||||
- // return ctx, fmt.Errorf("failed to get target pods")
|
||||
- // }
|
||||
ctx, err := f(ctx)
|
||||
if err != nil {
|
||||
return ctx, fmt.Errorf("failed to transform pod: %v", err)
|
||||
@@ -70,10 +66,6 @@ func act(ctx context.Context, f Action) error {
|
||||
if f == nil {
|
||||
return fmt.Errorf("podAction method is not implemented")
|
||||
}
|
||||
- // pods, ok := ctx.Value(common.TARGETPODS).(map[string]*typedef.PodInfo)
|
||||
- // if !ok {
|
||||
- // return nil
|
||||
- // }
|
||||
return f(ctx)
|
||||
}
|
||||
|
||||
diff --git a/pkg/core/typedef/containerinfo.go b/pkg/core/typedef/containerinfo.go
|
||||
index 3c61f08..ee5686b 100644
|
||||
--- a/pkg/core/typedef/containerinfo.go
|
||||
+++ b/pkg/core/typedef/containerinfo.go
|
||||
@@ -89,7 +89,7 @@ func NewContainerInfo(opts ...ConfigOpt) *ContainerInfo {
|
||||
}
|
||||
|
||||
if err := fromRawContainer(ci, conf.rawCont); err != nil {
|
||||
- fmt.Printf("failed to parse raw container: %v", err)
|
||||
+ fmt.Printf("failed to parse raw container: %v\n", err)
|
||||
}
|
||||
fromNRIContainer(ci, conf.nriCont)
|
||||
fromPodCgroupPath(ci, conf.podCgroupPath)
|
||||
@@ -109,17 +109,16 @@ func fromRawContainer(ci *ContainerInfo, rawCont *RawContainer) error {
|
||||
if rawCont == nil {
|
||||
return nil
|
||||
}
|
||||
- requests, limits := rawCont.GetResourceMaps()
|
||||
id, err := rawCont.GetRealContainerID()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse container ID: %v", err)
|
||||
}
|
||||
- if id == "" {
|
||||
- return fmt.Errorf("empty container id")
|
||||
+ // Note that the running pod may have containers that are being deleted or created, and their ids are empty.
|
||||
+ if id != "" {
|
||||
+ ci.ID = id
|
||||
}
|
||||
-
|
||||
ci.Name = rawCont.status.Name
|
||||
- ci.ID = id
|
||||
+ requests, limits := rawCont.GetResourceMaps()
|
||||
ci.RequestResources = requests
|
||||
ci.LimitResources = limits
|
||||
return nil
|
||||
diff --git a/pkg/core/typedef/rawpod.go b/pkg/core/typedef/rawpod.go
|
||||
index ed11c6b..45dbd40 100644
|
||||
--- a/pkg/core/typedef/rawpod.go
|
||||
+++ b/pkg/core/typedef/rawpod.go
|
||||
@@ -147,8 +147,8 @@ func (pod *RawPod) ExtractContainerInfos() map[string]*ContainerInfo {
|
||||
WithRawContainer(rawContainer),
|
||||
WithPodCgroup(pod.CgroupPath()),
|
||||
)
|
||||
+ // The empty ID means that the container is being deleted and no updates are needed.
|
||||
if ci.ID == "" {
|
||||
- fmt.Printf("failed to parse id from raw container\n")
|
||||
continue
|
||||
}
|
||||
idContainersMap[ci.ID] = ci
|
||||
@@ -158,6 +158,10 @@ func (pod *RawPod) ExtractContainerInfos() map[string]*ContainerInfo {
|
||||
|
||||
// GetRealContainerID parses the containerID of k8s
|
||||
func (cont *RawContainer) GetRealContainerID() (string, error) {
|
||||
+ // Empty container ID means the container may be in the creation or deletion phase.
|
||||
+ if cont.status.ContainerID == "" {
|
||||
+ return "", nil
|
||||
+ }
|
||||
/*
|
||||
Note:
|
||||
An UNDEFINED container engine was used when the function was executed for the first time
|
||||
@@ -175,13 +179,7 @@ func (cont *RawContainer) GetRealContainerID() (string, error) {
|
||||
if !currentContainerEngines.Support(cont) {
|
||||
return "", fmt.Errorf("unsupported container engine: %v", cont.status.ContainerID)
|
||||
}
|
||||
-
|
||||
- cid := cont.status.ContainerID[len(currentContainerEngines.Prefix()):]
|
||||
- // the container may be in the creation or deletion phase.
|
||||
- if len(cid) == 0 {
|
||||
- return "", nil
|
||||
- }
|
||||
- return cid, nil
|
||||
+ return cont.status.ContainerID[len(currentContainerEngines.Prefix()):], nil
|
||||
}
|
||||
|
||||
// GetResourceMaps returns the number of requests and limits of CPU and memory resources
|
||||
diff --git a/pkg/resource/analyze/analyzer.go b/pkg/resource/analyze/analyzer.go
|
||||
index 5c85897..20dd7b0 100644
|
||||
--- a/pkg/resource/analyze/analyzer.go
|
||||
+++ b/pkg/resource/analyze/analyzer.go
|
||||
@@ -14,6 +14,8 @@
|
||||
package analyze
|
||||
|
||||
import (
|
||||
+ "runtime"
|
||||
+
|
||||
v2 "github.com/google/cadvisor/info/v2"
|
||||
|
||||
"isula.org/rubik/pkg/common/log"
|
||||
@@ -53,7 +55,7 @@ func (a *Analyzer) CPUCalculatorBuilder(reqOpt *common.GetOption) Calculator {
|
||||
cpuUsageUs = float64(last.Cpu.Usage.Total-penultimate.Cpu.Usage.Total) / nanoToMicro
|
||||
timeDeltaUs = float64(last.Timestamp.Sub(penultimate.Timestamp).Microseconds())
|
||||
)
|
||||
- return util.Div(cpuUsageUs, timeDeltaUs) * percentageRate
|
||||
+ return util.Div(cpuUsageUs, timeDeltaUs) / float64(runtime.NumCPU()) * percentageRate
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +67,6 @@ func (a *Analyzer) MemoryCalculatorBuilder(reqOpt *common.GetOption) Calculator
|
||||
)
|
||||
podStats := a.getPodStats("/"+pi.Path, reqOpt)
|
||||
if len(podStats) < miniNum {
|
||||
- log.Errorf("pod %v has no enough memory stats collected, skip it", pi.Name)
|
||||
return -1
|
||||
}
|
||||
return float64(podStats[len(podStats)-1].Memory.Usage) / bytesToMb
|
||||
@@ -75,12 +76,12 @@ func (a *Analyzer) MemoryCalculatorBuilder(reqOpt *common.GetOption) Calculator
|
||||
func (a *Analyzer) getPodStats(cgroupPath string, reqOpt *common.GetOption) []*v2.ContainerStats {
|
||||
infoMap, err := a.GetPodStats(cgroupPath, *reqOpt)
|
||||
if err != nil {
|
||||
- log.Errorf("failed to get cgroup information %v: %v", cgroupPath, err)
|
||||
+ log.Warnf("failed to get cgroup information %v: %v", cgroupPath, err)
|
||||
return nil
|
||||
}
|
||||
info, existed := infoMap[cgroupPath]
|
||||
if !existed {
|
||||
- log.Errorf("failed to get cgroup %v from cadvisor", cgroupPath)
|
||||
+ log.Warnf("failed to get cgroup %v from cadvisor", cgroupPath)
|
||||
return nil
|
||||
}
|
||||
return info.Stats
|
||||
diff --git a/pkg/services/eviction/common/manager.go b/pkg/services/eviction/common/manager.go
|
||||
index 4758c72..8a30e4b 100644
|
||||
--- a/pkg/services/eviction/common/manager.go
|
||||
+++ b/pkg/services/eviction/common/manager.go
|
||||
@@ -74,7 +74,7 @@ func newCadvisorManager() (resource.Manager, error) {
|
||||
|
||||
// Controller is a controller for different resources
|
||||
type Controller interface {
|
||||
- Start(context.Context, func() error)
|
||||
+ Start(context.Context, func(func() bool) error)
|
||||
Config() interface{}
|
||||
}
|
||||
|
||||
@@ -213,12 +213,19 @@ func (m *Manager) Terminate(api.Viewer) error {
|
||||
return err
|
||||
}
|
||||
|
||||
-func (m *Manager) alarm(typ string) func() error {
|
||||
- return func() error {
|
||||
+func (m *Manager) alarm(typ string) func(func() bool) error {
|
||||
+ return func(needEvcit func() bool) error {
|
||||
+ pods := m.viewer.ListPodsWithOptions(priority(false))
|
||||
+ if len(pods) == 0 {
|
||||
+ return nil
|
||||
+ }
|
||||
var (
|
||||
errs error
|
||||
- ctx = context.WithValue(context.Background(), common.TARGETPODS, m.viewer.ListPodsWithOptions(priority(false)))
|
||||
+ ctx = context.WithValue(context.Background(), common.TARGETPODS, pods)
|
||||
)
|
||||
+ if !needEvcit() {
|
||||
+ return nil
|
||||
+ }
|
||||
for _, t := range m.baseMetric.Triggers[typ] {
|
||||
errs = util.AppendErr(errs, t.Activate(ctx))
|
||||
}
|
||||
diff --git a/pkg/services/eviction/cpu/cpu.go b/pkg/services/eviction/cpu/cpu.go
|
||||
index b11d94b..1cdbe7d 100644
|
||||
--- a/pkg/services/eviction/cpu/cpu.go
|
||||
+++ b/pkg/services/eviction/cpu/cpu.go
|
||||
@@ -62,17 +62,14 @@ func fromConfig(name string, f helper.ConfigHandler) (*Controller, error) {
|
||||
}
|
||||
|
||||
// Start loop collects data and performs eviction
|
||||
-func (c *Controller) Start(ctx context.Context, evictor func() error) {
|
||||
+func (c *Controller) Start(ctx context.Context, evictor func(func() bool) error) {
|
||||
wait.Until(
|
||||
func() {
|
||||
c.collect()
|
||||
if atomic.LoadInt32(&c.block) == 1 {
|
||||
return
|
||||
}
|
||||
- if !c.assertWithinLimit() {
|
||||
- return
|
||||
- }
|
||||
- if err := evictor(); err != nil {
|
||||
+ if err := evictor(c.assertWithinLimit); err != nil {
|
||||
log.Errorf("failed to execute cpuevict %v", err)
|
||||
return
|
||||
}
|
||||
@@ -128,16 +125,20 @@ func (c *Controller) averageUsage() float64 {
|
||||
c.RLock()
|
||||
defer c.RUnlock()
|
||||
if len(c.usages) < minUsageLen {
|
||||
- log.Infof("failed to get node cpu usage at %v", time.Now().Format(format))
|
||||
- return 0
|
||||
+ log.Debugf("failed to get node cpu usage at %v", time.Now().Format(format))
|
||||
+ return -1
|
||||
}
|
||||
util := quotaturbo.CalculateUtils(c.usages[0].cpuStats, c.usages[len(c.usages)-1].cpuStats)
|
||||
- log.Debugf("get node cpu usage %v at %v", util, time.Now().Format(format))
|
||||
return util
|
||||
}
|
||||
|
||||
func (c *Controller) assertWithinLimit() bool {
|
||||
- return c.averageUsage() >= float64(c.conf.Threshold)
|
||||
+ util := c.averageUsage()
|
||||
+ if util >= float64(c.conf.Threshold) {
|
||||
+ log.Infof("CPU exceeded: %v%%", util)
|
||||
+ return true
|
||||
+ }
|
||||
+ return false
|
||||
}
|
||||
|
||||
// Config returns the configuration
|
||||
diff --git a/pkg/services/eviction/memory/memory.go b/pkg/services/eviction/memory/memory.go
|
||||
index 9aa82dc..5215a8a 100644
|
||||
--- a/pkg/services/eviction/memory/memory.go
|
||||
+++ b/pkg/services/eviction/memory/memory.go
|
||||
@@ -51,16 +51,13 @@ func fromConfig(name string, f helper.ConfigHandler) (*Controller, error) {
|
||||
}
|
||||
|
||||
// Start loop collects data and performs eviction
|
||||
-func (c *Controller) Start(ctx context.Context, evictor func() error) {
|
||||
+func (c *Controller) Start(ctx context.Context, evictor func(func() bool) error) {
|
||||
wait.Until(
|
||||
func() {
|
||||
if atomic.LoadInt32(&c.block) == 1 {
|
||||
return
|
||||
}
|
||||
- if !c.assertWithinLimit() {
|
||||
- return
|
||||
- }
|
||||
- if err := evictor(); err != nil {
|
||||
+ if err := evictor(c.assertWithinLimit); err != nil {
|
||||
log.Errorf("failed to execute memory evict %v", err)
|
||||
return
|
||||
}
|
||||
@@ -86,7 +83,11 @@ func (c *Controller) assertWithinLimit() bool {
|
||||
log.Errorf("failed to get memory util at %v: %v", time.Now().Format(format), err)
|
||||
return false
|
||||
}
|
||||
- return v.UsedPercent >= float64(c.conf.Threshold)
|
||||
+ if v.UsedPercent >= float64(c.conf.Threshold) {
|
||||
+ log.Infof("Memory exceeded: %v%%", v.UsedPercent)
|
||||
+ return true
|
||||
+ }
|
||||
+ return false
|
||||
}
|
||||
|
||||
// Config returns the configuration
|
||||
--
|
||||
2.45.0
|
||||
|
||||
@ -1,69 +0,0 @@
|
||||
From 804ff7873331cf745bc49aab5f5d2857ec1597c6 Mon Sep 17 00:00:00 2001
|
||||
From: hanchao <hanchao63@huawei.com>
|
||||
Date: Mon, 5 Jun 2023 13:56:01 +0800
|
||||
Subject: [PATCH 2/7] rubik: fix weight for iocost does not take effect
|
||||
|
||||
reason: Fix weight for iocost does not take effect.
|
||||
The iocost weight is at pod level, not container
|
||||
level.
|
||||
---
|
||||
pkg/services/iocost/iocost.go | 7 +------
|
||||
pkg/services/iocost/iocost_origin.go | 8 ++++----
|
||||
pkg/services/iocost/iocost_test.go | 2 +-
|
||||
3 files changed, 6 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/pkg/services/iocost/iocost.go b/pkg/services/iocost/iocost.go
|
||||
index e5298b1..34f508a 100644
|
||||
--- a/pkg/services/iocost/iocost.go
|
||||
+++ b/pkg/services/iocost/iocost.go
|
||||
@@ -239,10 +239,5 @@ func (b *IOCost) configPodIOCostWeight(podInfo *typedef.PodInfo) error {
|
||||
if podInfo.Annotations[constant.PriorityAnnotationKey] == "false" {
|
||||
weight = onlineWeight
|
||||
}
|
||||
- for _, container := range podInfo.IDContainersMap {
|
||||
- if err := ConfigContainerIOCostWeight(container.Path, weight); err != nil {
|
||||
- return err
|
||||
- }
|
||||
- }
|
||||
- return nil
|
||||
+ return ConfigPodIOCostWeight(podInfo.Path, weight)
|
||||
}
|
||||
diff --git a/pkg/services/iocost/iocost_origin.go b/pkg/services/iocost/iocost_origin.go
|
||||
index d37109f..5e9948f 100644
|
||||
--- a/pkg/services/iocost/iocost_origin.go
|
||||
+++ b/pkg/services/iocost/iocost_origin.go
|
||||
@@ -63,14 +63,14 @@ func ConfigIOCostModel(devno string, p interface{}) error {
|
||||
return cgroup.WriteCgroupFile(paramStr, blkcgRootDir, iocostModelFile)
|
||||
}
|
||||
|
||||
-// ConfigContainerIOCostWeight for config iocost weight
|
||||
+// ConfigPodIOCostWeight for config iocost weight
|
||||
// cgroup v1 iocost cannot be inherited. Therefore, only the container level can be configured.
|
||||
-func ConfigContainerIOCostWeight(containerRelativePath string, weight uint64) error {
|
||||
+func ConfigPodIOCostWeight(relativePath string, weight uint64) error {
|
||||
if err := cgroup.WriteCgroupFile(strconv.FormatUint(weight, scale), blkcgRootDir,
|
||||
- containerRelativePath, iocostWeightFile); err != nil {
|
||||
+ relativePath, iocostWeightFile); err != nil {
|
||||
return err
|
||||
}
|
||||
- if err := bindMemcgBlkcg(containerRelativePath); err != nil {
|
||||
+ if err := bindMemcgBlkcg(relativePath); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
diff --git a/pkg/services/iocost/iocost_test.go b/pkg/services/iocost/iocost_test.go
|
||||
index 95b6d97..3bdadad 100644
|
||||
--- a/pkg/services/iocost/iocost_test.go
|
||||
+++ b/pkg/services/iocost/iocost_test.go
|
||||
@@ -334,7 +334,7 @@ func TestSetPodWeight(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
- err := ConfigContainerIOCostWeight(tt.cgroupPath, uint64(tt.weight))
|
||||
+ err := ConfigPodIOCostWeight(tt.cgroupPath, uint64(tt.weight))
|
||||
if tt.wantErr {
|
||||
assert.Contains(t, err.Error(), tt.errMsg)
|
||||
return
|
||||
--
|
||||
2.32.1 (Apple Git-133)
|
||||
|
||||
@ -1,264 +0,0 @@
|
||||
From e7c89f1935d117a2778339cc4774393331002254 Mon Sep 17 00:00:00 2001
|
||||
From: jingxiaolu <lujingxiao@huawei.com>
|
||||
Date: Mon, 12 Jun 2023 23:12:37 +0800
|
||||
Subject: [PATCH 3/7] rubik: test coverage for PSI Manager
|
||||
|
||||
Adding test cases for PSI Manager
|
||||
|
||||
Signed-off-by: jingxiaolu <lujingxiao@huawei.com>
|
||||
---
|
||||
Makefile | 3 +
|
||||
pkg/config/config_test.go | 29 ++++++++
|
||||
pkg/services/psi/psi.go | 14 ++--
|
||||
pkg/services/psi/psi_test.go | 126 +++++++++++++++++++++++++++++++++++
|
||||
pkg/services/service_test.go | 4 ++
|
||||
5 files changed, 169 insertions(+), 7 deletions(-)
|
||||
create mode 100644 pkg/services/psi/psi_test.go
|
||||
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 7a92d12..bd66147 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -54,6 +54,7 @@ help:
|
||||
@echo "make test-unit # run unit test"
|
||||
@echo "make cover # generate coverage report"
|
||||
@echo "make install # install files to /var/lib/rubik"
|
||||
+ @echo "make clean" # clean built files and test logs
|
||||
|
||||
prepare:
|
||||
mkdir -p $(TMP_DIR) $(BUILD_DIR)
|
||||
@@ -101,3 +102,5 @@ install:
|
||||
cp -f $(BUILD_DIR)/* $(INSTALL_DIR)
|
||||
cp -f $(BUILD_DIR)/rubik.service /lib/systemd/system/
|
||||
|
||||
+clean:
|
||||
+ rm -rf build/* cover.* unit_test_log
|
||||
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
|
||||
index dbbd2e4..03ff4ca 100644
|
||||
--- a/pkg/config/config_test.go
|
||||
+++ b/pkg/config/config_test.go
|
||||
@@ -53,6 +53,35 @@ var rubikConfig string = `
|
||||
"mid": 30,
|
||||
"high": 50
|
||||
}
|
||||
+ },
|
||||
+ "ioCost": [
|
||||
+ {
|
||||
+ "nodeName": "k8s-single",
|
||||
+ "config": [
|
||||
+ {
|
||||
+ "dev": "sdb",
|
||||
+ "enable": true,
|
||||
+ "model": "linear",
|
||||
+ "param": {
|
||||
+ "rbps": 10000000,
|
||||
+ "rseqiops": 10000000,
|
||||
+ "rrandiops": 10000000,
|
||||
+ "wbps": 10000000,
|
||||
+ "wseqiops": 10000000,
|
||||
+ "wrandiops": 10000000
|
||||
+ }
|
||||
+ }
|
||||
+ ]
|
||||
+ }
|
||||
+ ],
|
||||
+ "psi": {
|
||||
+ "interval": 10,
|
||||
+ "resource": [
|
||||
+ "cpu",
|
||||
+ "memory",
|
||||
+ "io"
|
||||
+ ],
|
||||
+ "avg10Threshold": 5.0
|
||||
}
|
||||
}
|
||||
`
|
||||
diff --git a/pkg/services/psi/psi.go b/pkg/services/psi/psi.go
|
||||
index 1c70255..a55922e 100644
|
||||
--- a/pkg/services/psi/psi.go
|
||||
+++ b/pkg/services/psi/psi.go
|
||||
@@ -37,19 +37,19 @@ const (
|
||||
minThreshold float64 = 5.0
|
||||
)
|
||||
|
||||
-// Factory is the QuotaTurbo factory class
|
||||
+// Factory is the PSI Manager factory class
|
||||
type Factory struct {
|
||||
ObjName string
|
||||
}
|
||||
|
||||
// Name returns the factory class name
|
||||
-func (i Factory) Name() string {
|
||||
- return "Factory"
|
||||
+func (f Factory) Name() string {
|
||||
+ return "PSIFactory"
|
||||
}
|
||||
|
||||
-// NewObj returns a QuotaTurbo object
|
||||
-func (i Factory) NewObj() (interface{}, error) {
|
||||
- return NewManager(i.ObjName), nil
|
||||
+// NewObj returns a Manager object
|
||||
+func (f Factory) NewObj() (interface{}, error) {
|
||||
+ return NewManager(f.ObjName), nil
|
||||
}
|
||||
|
||||
// Config is PSI service configuration
|
||||
@@ -130,7 +130,7 @@ func (m *Manager) SetConfig(f helper.ConfigHandler) error {
|
||||
}
|
||||
|
||||
// IsRunner returns true that tells other Manager is a persistent service
|
||||
-func (qt *Manager) IsRunner() bool {
|
||||
+func (m *Manager) IsRunner() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
diff --git a/pkg/services/psi/psi_test.go b/pkg/services/psi/psi_test.go
|
||||
new file mode 100644
|
||||
index 0000000..2036aa1
|
||||
--- /dev/null
|
||||
+++ b/pkg/services/psi/psi_test.go
|
||||
@@ -0,0 +1,126 @@
|
||||
+// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
|
||||
+// rubik licensed under the Mulan PSL v2.
|
||||
+// You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
+// You may obtain a copy of Mulan PSL v2 at:
|
||||
+// http://license.coscl.org.cn/MulanPSL2
|
||||
+// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
||||
+// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
||||
+// PURPOSE.
|
||||
+// See the Mulan PSL v2 for more details.
|
||||
+// Author: Jingxiao Lu
|
||||
+// Date: 2023-06-12
|
||||
+// Description: This file is used for testing psi.go
|
||||
+
|
||||
+package psi
|
||||
+
|
||||
+import (
|
||||
+ "context"
|
||||
+ "fmt"
|
||||
+ "testing"
|
||||
+ "time"
|
||||
+
|
||||
+ "isula.org/rubik/pkg/api"
|
||||
+ "isula.org/rubik/pkg/core/typedef"
|
||||
+)
|
||||
+
|
||||
+// TestNewManagerObj tests NewObj() for Factory
|
||||
+func TestNewManagerObj(t *testing.T) {
|
||||
+ var fact = Factory{
|
||||
+ ObjName: "psi",
|
||||
+ }
|
||||
+ nm, err := fact.NewObj()
|
||||
+ if err != nil {
|
||||
+ t.Fatalf("New PSI Manager failed: %v", err)
|
||||
+ return
|
||||
+ }
|
||||
+ fmt.Printf("New PSI Manager %s is %#v", fact.Name(), nm)
|
||||
+}
|
||||
+
|
||||
+// TestConfigValidate tests Config Validate
|
||||
+func TestConfigValidate(t *testing.T) {
|
||||
+ var tests = []struct {
|
||||
+ name string
|
||||
+ conf *Config
|
||||
+ wantErr bool
|
||||
+ }{
|
||||
+ {
|
||||
+ name: "TC1 - Default Config",
|
||||
+ conf: NewConfig(),
|
||||
+ wantErr: true,
|
||||
+ },
|
||||
+ {
|
||||
+ name: "TC2 - Wrong Interval value",
|
||||
+ conf: &Config{
|
||||
+ Interval: minInterval - 1,
|
||||
+ },
|
||||
+ wantErr: true,
|
||||
+ },
|
||||
+ {
|
||||
+ name: "TC3 - Wrong Threshold value",
|
||||
+ conf: &Config{
|
||||
+ Interval: minInterval,
|
||||
+ Avg10Threshold: minThreshold - 1,
|
||||
+ },
|
||||
+ wantErr: true,
|
||||
+ },
|
||||
+ {
|
||||
+ name: "TC4 - No resource type specified",
|
||||
+ conf: &Config{
|
||||
+ Interval: minInterval,
|
||||
+ Avg10Threshold: minThreshold,
|
||||
+ },
|
||||
+ wantErr: true,
|
||||
+ },
|
||||
+ {
|
||||
+ name: "TC5 - Wrong resource type cpuacct - cpuacct is for psi subsystem, not for resource type",
|
||||
+ conf: &Config{
|
||||
+ Interval: minInterval,
|
||||
+ Avg10Threshold: minThreshold,
|
||||
+ Resource: []string{"cpu", "memory", "io", "cpuacct"},
|
||||
+ },
|
||||
+ wantErr: true,
|
||||
+ },
|
||||
+ {
|
||||
+ name: "TC6 - Success case - trully end",
|
||||
+ conf: &Config{
|
||||
+ Interval: minInterval,
|
||||
+ Avg10Threshold: minThreshold,
|
||||
+ Resource: []string{"cpu", "memory", "io"},
|
||||
+ },
|
||||
+ wantErr: false,
|
||||
+ },
|
||||
+ }
|
||||
+ for _, tc := range tests {
|
||||
+ t.Run(tc.name, func(t *testing.T) {
|
||||
+ if err := tc.conf.Validate(); (err != nil) != tc.wantErr {
|
||||
+ t.Errorf("Config.Validate() error = %v, wantErr %v", err, tc.wantErr)
|
||||
+ }
|
||||
+ })
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+type FakeManager struct{}
|
||||
+
|
||||
+func (m *FakeManager) ListContainersWithOptions(options ...api.ListOption) map[string]*typedef.ContainerInfo {
|
||||
+ return make(map[string]*typedef.ContainerInfo)
|
||||
+}
|
||||
+func (m *FakeManager) ListPodsWithOptions(options ...api.ListOption) map[string]*typedef.PodInfo {
|
||||
+ return make(map[string]*typedef.PodInfo, 1)
|
||||
+}
|
||||
+
|
||||
+// TestManagerRun creates a fake manager and runs it
|
||||
+func TestManagerRun(t *testing.T) {
|
||||
+ nm := NewManager("psi")
|
||||
+ nm.conf.Interval = 1
|
||||
+ nm.PreStart(&FakeManager{})
|
||||
+ nm.SetConfig(func(configName string, d interface{}) error { return nil })
|
||||
+ if !nm.IsRunner() {
|
||||
+ t.Fatalf("FakeManager is not a runner!")
|
||||
+ return
|
||||
+ }
|
||||
+
|
||||
+ ctx, cancel := context.WithCancel(context.Background())
|
||||
+ go nm.Run(ctx)
|
||||
+ time.Sleep(time.Second)
|
||||
+ cancel()
|
||||
+}
|
||||
diff --git a/pkg/services/service_test.go b/pkg/services/service_test.go
|
||||
index a6e0298..537d0b3 100644
|
||||
--- a/pkg/services/service_test.go
|
||||
+++ b/pkg/services/service_test.go
|
||||
@@ -36,6 +36,10 @@ var defaultFeature = []FeatureSpec{
|
||||
Name: feature.QuotaTurboFeature,
|
||||
Default: true,
|
||||
},
|
||||
+ {
|
||||
+ Name: feature.PSIFeature,
|
||||
+ Default: true,
|
||||
+ },
|
||||
}
|
||||
|
||||
func TestErrorInitServiceComponents(t *testing.T) {
|
||||
--
|
||||
2.32.1 (Apple Git-133)
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -1,83 +0,0 @@
|
||||
From c74d87538bc27a20c24d27319c5d60970ab3ccab Mon Sep 17 00:00:00 2001
|
||||
From: vegbir <yangjiaqi16@huawei.com>
|
||||
Date: Wed, 14 Jun 2023 17:14:16 +0800
|
||||
Subject: [PATCH 5/7] rubik: move fssr design document to design dir
|
||||
|
||||
Signed-off-by: vegbir <yangjiaqi16@huawei.com>
|
||||
---
|
||||
docs/{ => design}/fssr.md | 18 ++++++++++++++----
|
||||
.../fssr/flowchart.png} | Bin
|
||||
.../fssr/sequence_diagram.png} | Bin
|
||||
3 files changed, 14 insertions(+), 4 deletions(-)
|
||||
rename docs/{ => design}/fssr.md (90%)
|
||||
rename docs/{png/rubik_fssr_2.png => images/fssr/flowchart.png} (100%)
|
||||
rename docs/{png/rubik_fssr_1.png => images/fssr/sequence_diagram.png} (100%)
|
||||
|
||||
diff --git a/docs/fssr.md b/docs/design/fssr.md
|
||||
similarity index 90%
|
||||
rename from docs/fssr.md
|
||||
rename to docs/design/fssr.md
|
||||
index 3fb36bd..184b364 100644
|
||||
--- a/docs/fssr.md
|
||||
+++ b/docs/design/fssr.md
|
||||
@@ -1,20 +1,27 @@
|
||||
+# 【需求设计】异步内存分级回收 fssr策略
|
||||
+
|
||||
## 方案目标
|
||||
+
|
||||
在混部集群中,在线和离线业务被同时部署到同一物理资源(节点)上,同时离线业务是内存资源消耗型,在线业务有波峰波谷,在离线业务之间内存资源竞争导致在线业务受影响。该方案目标在充分利用内存资源的同时保证在线QoS。
|
||||
|
||||
## 总体设计
|
||||
+
|
||||
各个模块之间的联系如下:
|
||||
-
|
||||
+
|
||||
|
||||
- 用户部署rubik,rubik向k8s注册监听pod事件。
|
||||
- 当离线业务被部署时k8s会通知rubik,rubik向该离线pod配置memory.high。
|
||||
- 同时rubik实时监控当前节点的内存使用量,使用fssr策略向pod配置memory.high。
|
||||
|
||||
### 依赖说明
|
||||
+
|
||||
内核需要支持memcg级内存水位线方案,即提供`memory.high`和`memory.high_async_ratio`。
|
||||
|
||||
### 详细设计
|
||||
+
|
||||
内存分级方案中,rubik新增FSSR内存处理模块,该模块主要处理获取主机(节点)的总内存(total memory)、预留内存(reserved memory)、剩余内存(free memory)。并根据FSSR算法设置离线内存的memory.high。具体策略如下:
|
||||
-
|
||||
+
|
||||
+
|
||||
- rubik启动时计算预留内存,默认为总内存的10%,如果总内存的10%超过10G,则为10G
|
||||
- 配置离线容器的cgroup级别水位线,内核提供`memory.high`和`memory.high_async_ratio`两个接口,分别配置cgroup的软上限和警戒水位线。启动rubik时默认配置`memory.high`为`total_memory`(总内存)`*`80%
|
||||
- 获取剩余内存free_memory
|
||||
@@ -22,13 +29,16 @@
|
||||
- 持续一分钟free_memory>2`*`reserved_memory时提高离线的memory.high,每次提升总内存的1%,total_memory`*`1%
|
||||
|
||||
说明:
|
||||
+
|
||||
1. 离线应用memory.high的范围为`[total_memory*30%, total_memory*80%]`
|
||||
|
||||
### 配置说明
|
||||
-```
|
||||
+
|
||||
+```json
|
||||
"dynMemory": {
|
||||
"policy": "fssr"
|
||||
}
|
||||
```
|
||||
+
|
||||
- dynMemory表示动态内存
|
||||
-- policy目前只支持fssr
|
||||
\ No newline at end of file
|
||||
+- policy目前只支持fssr
|
||||
diff --git a/docs/png/rubik_fssr_2.png b/docs/images/fssr/flowchart.png
|
||||
similarity index 100%
|
||||
rename from docs/png/rubik_fssr_2.png
|
||||
rename to docs/images/fssr/flowchart.png
|
||||
diff --git a/docs/png/rubik_fssr_1.png b/docs/images/fssr/sequence_diagram.png
|
||||
similarity index 100%
|
||||
rename from docs/png/rubik_fssr_1.png
|
||||
rename to docs/images/fssr/sequence_diagram.png
|
||||
--
|
||||
2.32.1 (Apple Git-133)
|
||||
|
||||
@ -1,246 +0,0 @@
|
||||
From 526bd12a3b77135ce5f112f3195f1e7d41d965d5 Mon Sep 17 00:00:00 2001
|
||||
From: hanchao <hanchao63@huawei.com>
|
||||
Date: Fri, 16 Jun 2023 13:10:42 +0800
|
||||
Subject: [PATCH 6/7] rubik: fix that value of memory.high_async_ratio lost
|
||||
efficacy
|
||||
|
||||
---
|
||||
pkg/services/dynmemory/dynmemory.go | 22 +++++++-
|
||||
pkg/services/dynmemory/fssr.go | 78 +++++++++++++++++------------
|
||||
2 files changed, 67 insertions(+), 33 deletions(-)
|
||||
|
||||
diff --git a/pkg/services/dynmemory/dynmemory.go b/pkg/services/dynmemory/dynmemory.go
|
||||
index da859dd..b73f476 100644
|
||||
--- a/pkg/services/dynmemory/dynmemory.go
|
||||
+++ b/pkg/services/dynmemory/dynmemory.go
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
"isula.org/rubik/pkg/api"
|
||||
+ "isula.org/rubik/pkg/core/typedef"
|
||||
"isula.org/rubik/pkg/services/helper"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
)
|
||||
@@ -15,6 +16,7 @@ type DynMemoryAdapter interface {
|
||||
preStart(api.Viewer) error
|
||||
getInterval() int
|
||||
dynamicAdjust()
|
||||
+ setOfflinePod(path string) error
|
||||
}
|
||||
type dynMemoryConfig struct {
|
||||
Policy string `json:"policy,omitempty"`
|
||||
@@ -42,11 +44,11 @@ type DynMemory struct {
|
||||
}
|
||||
|
||||
// PreStart is an interface for calling a collection of methods when the service is pre-started
|
||||
-func (dynMem *DynMemory) PreStart(api api.Viewer) error {
|
||||
+func (dynMem *DynMemory) PreStart(viewer api.Viewer) error {
|
||||
if dynMem.dynMemoryAdapter == nil {
|
||||
return nil
|
||||
}
|
||||
- return dynMem.dynMemoryAdapter.preStart(api)
|
||||
+ return dynMem.dynMemoryAdapter.preStart(viewer)
|
||||
}
|
||||
|
||||
// SetConfig is an interface that invoke the ConfigHandler to obtain the corresponding configuration.
|
||||
@@ -81,6 +83,22 @@ func (dynMem *DynMemory) IsRunner() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
+// AddPod to deal the event of adding a pod.
|
||||
+func (dynMem *DynMemory) AddPod(podInfo *typedef.PodInfo) error {
|
||||
+ if podInfo.Offline() {
|
||||
+ return dynMem.dynMemoryAdapter.setOfflinePod(podInfo.Path)
|
||||
+ }
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// UpdatePod to deal the pod update event.
|
||||
+func (dynMem *DynMemory) UpdatePod(old, new *typedef.PodInfo) error {
|
||||
+ if new.Offline() {
|
||||
+ return dynMem.dynMemoryAdapter.setOfflinePod(new.Path)
|
||||
+ }
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
// newAdapter to create adapter of dyn memory.
|
||||
func newAdapter(policy string) DynMemoryAdapter {
|
||||
switch policy {
|
||||
diff --git a/pkg/services/dynmemory/fssr.go b/pkg/services/dynmemory/fssr.go
|
||||
index 9fe4042..e23a4bc 100644
|
||||
--- a/pkg/services/dynmemory/fssr.go
|
||||
+++ b/pkg/services/dynmemory/fssr.go
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
|
||||
"isula.org/rubik/pkg/api"
|
||||
"isula.org/rubik/pkg/common/constant"
|
||||
+ "isula.org/rubik/pkg/common/log"
|
||||
"isula.org/rubik/pkg/common/util"
|
||||
"isula.org/rubik/pkg/core/typedef"
|
||||
"isula.org/rubik/pkg/core/typedef/cgroup"
|
||||
@@ -30,71 +31,76 @@ type fssrDynMemAdapter struct {
|
||||
memTotal int64
|
||||
memHigh int64
|
||||
reservedMem int64
|
||||
- api api.Viewer
|
||||
count int64
|
||||
+ viewer api.Viewer
|
||||
}
|
||||
|
||||
-// initFssrDynMemAdapter function
|
||||
+// initFssrDynMemAdapter initializes a new fssrDynMemAdapter struct.
|
||||
func initFssrDynMemAdapter() *fssrDynMemAdapter {
|
||||
if total, err := getFieldMemory("MemTotal"); err == nil && total > 0 {
|
||||
return &fssrDynMemAdapter{
|
||||
memTotal: total,
|
||||
memHigh: total * 8 / 10,
|
||||
- reservedMem: total * 8 / 10,
|
||||
+ reservedMem: total * 1 / 10,
|
||||
+ count: 0,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
-// preStart function
|
||||
-func (f *fssrDynMemAdapter) preStart(api api.Viewer) error {
|
||||
- f.api = api
|
||||
+// preStart initializes the fssrDynMemAdapter with the provided viewer and
|
||||
+// deals with any existing pods.
|
||||
+func (f *fssrDynMemAdapter) preStart(viewer api.Viewer) error {
|
||||
+ f.viewer = viewer
|
||||
return f.dealExistedPods()
|
||||
}
|
||||
|
||||
-// getInterval function
|
||||
+// getInterval returns the fssrInterval value.
|
||||
func (f *fssrDynMemAdapter) getInterval() int {
|
||||
return fssrInterval
|
||||
}
|
||||
|
||||
-// dynadjust function
|
||||
+// dynamicAdjust adjusts the memory allocation of the fssrDynMemAdapter by
|
||||
+// increasing or decreasing the amount of memory reserved for offline pods
|
||||
+// based on the current amount of free memory available on the system.
|
||||
func (f *fssrDynMemAdapter) dynamicAdjust() {
|
||||
var freeMem int64
|
||||
var err error
|
||||
if freeMem, err = getFieldMemory("MemFree"); err != nil {
|
||||
return
|
||||
}
|
||||
+
|
||||
+ var memHigh int64 = 0
|
||||
if freeMem > 2*f.reservedMem {
|
||||
if f.count < fssrIntervalCount {
|
||||
f.count++
|
||||
return
|
||||
}
|
||||
- memHigh := f.memHigh + f.memTotal/100
|
||||
+ // no risk of overflow
|
||||
+ memHigh = f.memHigh + f.memTotal/100
|
||||
if memHigh > f.memTotal*8/10 {
|
||||
memHigh = f.memTotal * 8 / 10
|
||||
}
|
||||
- if memHigh != f.memHigh {
|
||||
- f.memHigh = memHigh
|
||||
- f.adjustOfflinePodHighMemory()
|
||||
- }
|
||||
} else if freeMem < f.reservedMem {
|
||||
- memHigh := f.memHigh - f.memTotal/10
|
||||
+ memHigh = f.memHigh - f.memTotal/10
|
||||
if memHigh < 0 {
|
||||
return
|
||||
}
|
||||
if memHigh < f.memTotal*3/10 {
|
||||
memHigh = f.memTotal * 3 / 10
|
||||
}
|
||||
- if memHigh != f.memHigh {
|
||||
- f.memHigh = memHigh
|
||||
- f.adjustOfflinePodHighMemory()
|
||||
- }
|
||||
}
|
||||
+ if memHigh != f.memHigh {
|
||||
+ f.memHigh = memHigh
|
||||
+ f.adjustOfflinePodHighMemory()
|
||||
+ }
|
||||
+
|
||||
f.count = 0
|
||||
}
|
||||
|
||||
+// adjustOfflinePodHighMemory adjusts the memory.high of offline pods.
|
||||
func (f *fssrDynMemAdapter) adjustOfflinePodHighMemory() error {
|
||||
- pods := listOfflinePods(f.api)
|
||||
+ pods := listOfflinePods(f.viewer)
|
||||
for _, podInfo := range pods {
|
||||
if err := setOfflinePodHighMemory(podInfo.Path, f.memHigh); err != nil {
|
||||
return err
|
||||
@@ -103,20 +109,18 @@ func (f *fssrDynMemAdapter) adjustOfflinePodHighMemory() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
-// dealExistedPods function
|
||||
+// dealExistedPods handles offline pods by setting their memory.high and memory.high_async_ratio
|
||||
func (f *fssrDynMemAdapter) dealExistedPods() error {
|
||||
- pods := listOfflinePods(f.api)
|
||||
+ pods := listOfflinePods(f.viewer)
|
||||
for _, podInfo := range pods {
|
||||
- if err := setOfflinePodHighMemory(podInfo.Path, f.memHigh); err != nil {
|
||||
- return err
|
||||
- }
|
||||
- if err := setOfflinePodHighAsyncRatio(podInfo.Path, highRatio); err != nil {
|
||||
- return err
|
||||
+ if err := f.setOfflinePod(podInfo.Path); err != nil {
|
||||
+ log.Errorf("set fssr of offline pod[%v] error:%v", podInfo.UID, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
+// listOfflinePods returns a map of offline PodInfo objects.
|
||||
func listOfflinePods(viewer api.Viewer) map[string]*typedef.PodInfo {
|
||||
offlineValue := "true"
|
||||
return viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool {
|
||||
@@ -124,23 +128,35 @@ func listOfflinePods(viewer api.Viewer) map[string]*typedef.PodInfo {
|
||||
})
|
||||
}
|
||||
|
||||
-func setOfflinePodHighMemory(podPath string, high int64) error {
|
||||
- if err := cgroup.WriteCgroupFile(strconv.FormatUint(uint64(high), scale), memcgRootDir,
|
||||
+// setOfflinePod sets the offline pod for the given path.
|
||||
+func (f *fssrDynMemAdapter) setOfflinePod(path string) error {
|
||||
+ if err := setOfflinePodHighAsyncRatio(path, highRatio); err != nil {
|
||||
+ return err
|
||||
+ }
|
||||
+ return setOfflinePodHighMemory(path, f.memHigh)
|
||||
+}
|
||||
+
|
||||
+// setOfflinePodHighMemory sets the high memory limit for the specified pod in the
|
||||
+// cgroup memory
|
||||
+func setOfflinePodHighMemory(podPath string, memHigh int64) error {
|
||||
+ if err := cgroup.WriteCgroupFile(strconv.FormatUint(uint64(memHigh), scale), memcgRootDir,
|
||||
podPath, highMemFile); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
-func setOfflinePodHighAsyncRatio(podPath string, ratio uint64) error {
|
||||
- if err := cgroup.WriteCgroupFile(strconv.FormatUint(ratio, scale), memcgRootDir,
|
||||
+// setOfflinePodHighAsyncRatio sets the high memory async ratio for a pod in an offline state.
|
||||
+func setOfflinePodHighAsyncRatio(podPath string, ratio uint) error {
|
||||
+ if err := cgroup.WriteCgroupFile(strconv.FormatUint(uint64(ratio), scale), memcgRootDir,
|
||||
podPath, highMemAsyncRatioFile); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
-// getFieldMemory function
|
||||
+// getFieldMemory retrieves the amount of memory used by a certain field in the
|
||||
+// memory information file.
|
||||
func getFieldMemory(field string) (int64, error) {
|
||||
if !util.PathExist(memInfoFile) {
|
||||
return 0, fmt.Errorf("%v: no such file or diretory", memInfoFile)
|
||||
--
|
||||
2.32.1 (Apple Git-133)
|
||||
|
||||
@ -1,127 +0,0 @@
|
||||
From 6b9d862857a1b302b26d8d51e7df5fed3062ba94 Mon Sep 17 00:00:00 2001
|
||||
From: vegbir <yangjiaqi16@huawei.com>
|
||||
Date: Mon, 19 Jun 2023 11:47:53 +0800
|
||||
Subject: [PATCH 7/7] bugfix: fix typos & calling order of waitgroup
|
||||
|
||||
Signed-off-by: vegbir <yangjiaqi16@huawei.com>
|
||||
---
|
||||
pkg/core/trigger/base.go | 4 ++--
|
||||
pkg/core/trigger/expulsion.go | 2 +-
|
||||
pkg/core/trigger/resourceanalysis.go | 6 +++---
|
||||
pkg/rubik/servicemanager.go | 6 +++---
|
||||
4 files changed, 9 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/pkg/core/trigger/base.go b/pkg/core/trigger/base.go
|
||||
index 7f1fbe9..c212f66 100644
|
||||
--- a/pkg/core/trigger/base.go
|
||||
+++ b/pkg/core/trigger/base.go
|
||||
@@ -70,8 +70,8 @@ type TreeTrigger struct {
|
||||
subTriggers []Trigger
|
||||
}
|
||||
|
||||
-// withTreeTirgger returns a BaseMetric object
|
||||
-func withTreeTirgger(name string, exec Executor) *TreeTrigger {
|
||||
+// withTreeTrigger returns a BaseMetric object
|
||||
+func withTreeTrigger(name string, exec Executor) *TreeTrigger {
|
||||
return &TreeTrigger{
|
||||
name: name,
|
||||
exec: exec,
|
||||
diff --git a/pkg/core/trigger/expulsion.go b/pkg/core/trigger/expulsion.go
|
||||
index 87dd484..e438d3d 100644
|
||||
--- a/pkg/core/trigger/expulsion.go
|
||||
+++ b/pkg/core/trigger/expulsion.go
|
||||
@@ -41,7 +41,7 @@ var expulsionCreator = func() Trigger {
|
||||
appendUsedExecutors(ExpulsionAnno, expulsionExec)
|
||||
}
|
||||
}
|
||||
- return withTreeTirgger(ExpulsionAnno, expulsionExec)
|
||||
+ return withTreeTrigger(ExpulsionAnno, expulsionExec)
|
||||
}
|
||||
|
||||
// Expulsion is the trigger to evict pods
|
||||
diff --git a/pkg/core/trigger/resourceanalysis.go b/pkg/core/trigger/resourceanalysis.go
|
||||
index a3d99e5..7e7413e 100644
|
||||
--- a/pkg/core/trigger/resourceanalysis.go
|
||||
+++ b/pkg/core/trigger/resourceanalysis.go
|
||||
@@ -49,7 +49,7 @@ var analyzerCreator = func() Trigger {
|
||||
appendUsedExecutors(ResourceAnalysisAnno, resourceAnalysisExec)
|
||||
}
|
||||
}
|
||||
- return withTreeTirgger(ResourceAnalysisAnno, resourceAnalysisExec)
|
||||
+ return withTreeTrigger(ResourceAnalysisAnno, resourceAnalysisExec)
|
||||
}
|
||||
|
||||
// rreqOpt is the option to get information from cadvisor
|
||||
@@ -161,7 +161,7 @@ func (a *Analyzer) maxCPUUtil(pods map[string]*typedef.PodInfo) *typedef.PodInfo
|
||||
}
|
||||
}
|
||||
if chosen != nil {
|
||||
- log.Infof("find the max cpu util pod \"%v\": %v", chosen.Name, maxUtil)
|
||||
+ log.Infof("find the pod(%v) with the highest cpu utilization(%v)", chosen.Name, maxUtil)
|
||||
}
|
||||
return chosen
|
||||
}
|
||||
@@ -185,7 +185,7 @@ func (a *Analyzer) maxMemoryUtil(pods map[string]*typedef.PodInfo) *typedef.PodI
|
||||
}
|
||||
}
|
||||
if chosen != nil {
|
||||
- log.Infof("find the max cpu util pod \"%v\": %v", chosen.Name, maxUtil)
|
||||
+ log.Infof("find the pod(%v) with the highest memory utilization(%v)", chosen.Name, maxUtil)
|
||||
}
|
||||
return chosen
|
||||
}
|
||||
diff --git a/pkg/rubik/servicemanager.go b/pkg/rubik/servicemanager.go
|
||||
index 3e162b6..c3b252a 100644
|
||||
--- a/pkg/rubik/servicemanager.go
|
||||
+++ b/pkg/rubik/servicemanager.go
|
||||
@@ -218,7 +218,6 @@ func (manager *ServiceManager) addFunc(event typedef.Event) {
|
||||
|
||||
const retryCount = 5
|
||||
addOnce := func(s services.Service, podInfo *typedef.PodInfo, wg *sync.WaitGroup) {
|
||||
- wg.Add(1)
|
||||
for i := 0; i < retryCount; i++ {
|
||||
if err := s.AddPod(podInfo); err != nil {
|
||||
log.Errorf("service %s add func failed: %v", s.ID(), err)
|
||||
@@ -231,6 +230,7 @@ func (manager *ServiceManager) addFunc(event typedef.Event) {
|
||||
manager.RLock()
|
||||
var wg sync.WaitGroup
|
||||
for _, s := range manager.RunningServices {
|
||||
+ wg.Add(1)
|
||||
go addOnce(s, podInfo.DeepCopy(), &wg)
|
||||
}
|
||||
wg.Wait()
|
||||
@@ -250,7 +250,6 @@ func (manager *ServiceManager) updateFunc(event typedef.Event) {
|
||||
return
|
||||
}
|
||||
runOnce := func(s services.Service, old, new *typedef.PodInfo, wg *sync.WaitGroup) {
|
||||
- wg.Add(1)
|
||||
log.Debugf("update Func with service: %s", s.ID())
|
||||
if err := s.UpdatePod(old, new); err != nil {
|
||||
log.Errorf("service %s update func failed: %v", s.ID(), err)
|
||||
@@ -260,6 +259,7 @@ func (manager *ServiceManager) updateFunc(event typedef.Event) {
|
||||
manager.RLock()
|
||||
var wg sync.WaitGroup
|
||||
for _, s := range manager.RunningServices {
|
||||
+ wg.Add(1)
|
||||
go runOnce(s, podInfos[0], podInfos[1], &wg)
|
||||
}
|
||||
wg.Wait()
|
||||
@@ -275,7 +275,6 @@ func (manager *ServiceManager) deleteFunc(event typedef.Event) {
|
||||
}
|
||||
|
||||
deleteOnce := func(s services.Service, podInfo *typedef.PodInfo, wg *sync.WaitGroup) {
|
||||
- wg.Add(1)
|
||||
if err := s.DeletePod(podInfo); err != nil {
|
||||
log.Errorf("service %s delete func failed: %v", s.ID(), err)
|
||||
}
|
||||
@@ -284,6 +283,7 @@ func (manager *ServiceManager) deleteFunc(event typedef.Event) {
|
||||
manager.RLock()
|
||||
var wg sync.WaitGroup
|
||||
for _, s := range manager.RunningServices {
|
||||
+ wg.Add(1)
|
||||
go deleteOnce(s, podInfo.DeepCopy(), &wg)
|
||||
}
|
||||
wg.Wait()
|
||||
--
|
||||
2.32.1 (Apple Git-133)
|
||||
|
||||
@ -1,135 +0,0 @@
|
||||
From e30c428721ac2fd0da5152a28de04dbbf9c9d1ea Mon Sep 17 00:00:00 2001
|
||||
From: jingxiaolu <lujingxiao@huawei.com>
|
||||
Date: Sun, 11 Jun 2023 22:34:08 +0800
|
||||
Subject: [PATCH 1/2] rubik: test coverage improvement for pkg/config
|
||||
|
||||
1. improve test coverage for pkg/config from 57.4% to 80.3%
|
||||
2. change cpuLimit to 1 in TestStatusStore_AddCgroup-TC5 for nano vm
|
||||
|
||||
Signed-off-by: jingxiaolu <lujingxiao@huawei.com>
|
||||
---
|
||||
pkg/config/config_test.go | 69 ++++++++++++++++++++--
|
||||
pkg/lib/cpu/quotaturbo/statusstore_test.go | 2 +-
|
||||
2 files changed, 66 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
|
||||
index 03ff4ca..8766e04 100644
|
||||
--- a/pkg/config/config_test.go
|
||||
+++ b/pkg/config/config_test.go
|
||||
@@ -27,7 +27,8 @@ import (
|
||||
"isula.org/rubik/pkg/common/util"
|
||||
)
|
||||
|
||||
-var rubikConfig string = `
|
||||
+func TestNewConfig(t *testing.T) {
|
||||
+ var rubikConfig string = `
|
||||
{
|
||||
"agent": {
|
||||
"logDriver": "stdio",
|
||||
@@ -85,27 +86,87 @@ var rubikConfig string = `
|
||||
}
|
||||
}
|
||||
`
|
||||
+ if !util.PathExist(constant.TmpTestDir) {
|
||||
+ if err := os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode); err != nil {
|
||||
+ assert.NoError(t, err)
|
||||
+ }
|
||||
+ }
|
||||
|
||||
-func TestNewConfig(t *testing.T) {
|
||||
+ defer os.RemoveAll(constant.TmpTestDir)
|
||||
+
|
||||
+ tmpConfigFile := filepath.Join(constant.TmpTestDir, "config.json")
|
||||
+ defer os.Remove(tmpConfigFile)
|
||||
+ if err := ioutil.WriteFile(tmpConfigFile, []byte(rubikConfig), constant.DefaultFileMode); err != nil {
|
||||
+ assert.NoError(t, err)
|
||||
+ }
|
||||
+
|
||||
+ c := NewConfig(JSON)
|
||||
+ if err := c.LoadConfig(tmpConfigFile); err != nil {
|
||||
+ assert.NoError(t, err)
|
||||
+ }
|
||||
+ fmt.Printf("config: %v", c)
|
||||
+}
|
||||
+
|
||||
+func TestNewConfigNoConfig(t *testing.T) {
|
||||
+ c := &Config{}
|
||||
+ if err := c.LoadConfig(""); err == nil {
|
||||
+ t.Fatalf("Config file exists")
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+func TestNewConfigDamagedConfig(t *testing.T) {
|
||||
+ var rubikConfig string = `{`
|
||||
if !util.PathExist(constant.TmpTestDir) {
|
||||
if err := os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode); err != nil {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
}
|
||||
+ defer os.RemoveAll(constant.TmpTestDir)
|
||||
+
|
||||
+ tmpConfigFile := filepath.Join(constant.TmpTestDir, "config.json")
|
||||
+ defer os.Remove(tmpConfigFile)
|
||||
+ if err := ioutil.WriteFile(tmpConfigFile, []byte(rubikConfig), constant.DefaultFileMode); err != nil {
|
||||
+ assert.NoError(t, err)
|
||||
+ }
|
||||
+
|
||||
+ c := NewConfig(JSON)
|
||||
+ if err := c.LoadConfig(tmpConfigFile); err == nil {
|
||||
+ t.Fatalf("Damaged config file should not be loaded.")
|
||||
+ }
|
||||
+}
|
||||
|
||||
+func TestNewConfigNoAgentConfig(t *testing.T) {
|
||||
+ var rubikConfig string = `{}`
|
||||
+ if !util.PathExist(constant.TmpTestDir) {
|
||||
+ if err := os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode); err != nil {
|
||||
+ assert.NoError(t, err)
|
||||
+ }
|
||||
+ }
|
||||
defer os.RemoveAll(constant.TmpTestDir)
|
||||
|
||||
tmpConfigFile := filepath.Join(constant.TmpTestDir, "config.json")
|
||||
defer os.Remove(tmpConfigFile)
|
||||
if err := ioutil.WriteFile(tmpConfigFile, []byte(rubikConfig), constant.DefaultFileMode); err != nil {
|
||||
assert.NoError(t, err)
|
||||
- return
|
||||
}
|
||||
|
||||
c := NewConfig(JSON)
|
||||
if err := c.LoadConfig(tmpConfigFile); err != nil {
|
||||
assert.NoError(t, err)
|
||||
- return
|
||||
}
|
||||
fmt.Printf("config: %v", c)
|
||||
}
|
||||
+
|
||||
+func TestUnwrapServiceConfig(t *testing.T) {
|
||||
+ c := &Config{}
|
||||
+ c.Fields = make(map[string]interface{})
|
||||
+ c.Fields["agent"] = nil
|
||||
+ c.Fields["config"] = nil
|
||||
+ sc := c.UnwrapServiceConfig()
|
||||
+ if _, exist := sc["agent"]; exist {
|
||||
+ t.Fatalf("agent is exists")
|
||||
+ }
|
||||
+ if _, exist := sc["config"]; !exist {
|
||||
+ t.Fatalf("config is not exists")
|
||||
+ }
|
||||
+}
|
||||
diff --git a/pkg/lib/cpu/quotaturbo/statusstore_test.go b/pkg/lib/cpu/quotaturbo/statusstore_test.go
|
||||
index 68c01c5..ce1684d 100644
|
||||
--- a/pkg/lib/cpu/quotaturbo/statusstore_test.go
|
||||
+++ b/pkg/lib/cpu/quotaturbo/statusstore_test.go
|
||||
@@ -354,7 +354,7 @@ func TestStatusStore_AddCgroup(t *testing.T) {
|
||||
name: "TC5-add successfully",
|
||||
args: args{
|
||||
cgroupPath: contPath,
|
||||
- cpuLimit: 2,
|
||||
+ cpuLimit: 1,
|
||||
},
|
||||
fields: fields{
|
||||
Config: &Config{
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,65 +0,0 @@
|
||||
From 72dbcc6acf989f7c3423b24091c0b9875d1f4872 Mon Sep 17 00:00:00 2001
|
||||
From: hanchao <hanchao63@huawei.com>
|
||||
Date: Mon, 19 Jun 2023 20:29:41 +0800
|
||||
Subject: [PATCH 2/2] rubik: optimize `dynamicAdjust` to be clear and add log
|
||||
for error
|
||||
|
||||
---
|
||||
pkg/services/dynmemory/fssr.go | 18 ++++++++++++++----
|
||||
1 file changed, 14 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/pkg/services/dynmemory/fssr.go b/pkg/services/dynmemory/fssr.go
|
||||
index e23a4bc..2c81ccf 100644
|
||||
--- a/pkg/services/dynmemory/fssr.go
|
||||
+++ b/pkg/services/dynmemory/fssr.go
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
+ "math"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
@@ -76,26 +77,35 @@ func (f *fssrDynMemAdapter) dynamicAdjust() {
|
||||
f.count++
|
||||
return
|
||||
}
|
||||
- // no risk of overflow
|
||||
+ // check int64 overflow
|
||||
+ if f.memHigh > math.MaxInt64-f.memTotal/100 {
|
||||
+ log.Errorf("int64 overflow")
|
||||
+ return
|
||||
+ }
|
||||
memHigh = f.memHigh + f.memTotal/100
|
||||
if memHigh > f.memTotal*8/10 {
|
||||
memHigh = f.memTotal * 8 / 10
|
||||
}
|
||||
+ f.adjustMemoryHigh(memHigh)
|
||||
} else if freeMem < f.reservedMem {
|
||||
memHigh = f.memHigh - f.memTotal/10
|
||||
- if memHigh < 0 {
|
||||
+ if memHigh <= 0 {
|
||||
+ log.Errorf("memHigh is equal to or less than 0")
|
||||
return
|
||||
}
|
||||
if memHigh < f.memTotal*3/10 {
|
||||
memHigh = f.memTotal * 3 / 10
|
||||
}
|
||||
+ f.adjustMemoryHigh(memHigh)
|
||||
}
|
||||
+ f.count = 0
|
||||
+}
|
||||
+
|
||||
+func (f *fssrDynMemAdapter) adjustMemoryHigh(memHigh int64) {
|
||||
if memHigh != f.memHigh {
|
||||
f.memHigh = memHigh
|
||||
f.adjustOfflinePodHighMemory()
|
||||
}
|
||||
-
|
||||
- f.count = 0
|
||||
}
|
||||
|
||||
// adjustOfflinePodHighMemory adjusts the memory.high of offline pods.
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,76 +0,0 @@
|
||||
From d704f38600a31138107460b7eba4a68a0b8362ea Mon Sep 17 00:00:00 2001
|
||||
From: vegbir <yangjiaqi16@huawei.com>
|
||||
Date: Tue, 26 Sep 2023 17:05:06 +0800
|
||||
Subject: [PATCH] support isulad container engine
|
||||
|
||||
Signed-off-by: vegbir <yangjiaqi16@huawei.com>
|
||||
---
|
||||
pkg/core/typedef/containerinfo.go | 5 +++++
|
||||
pkg/core/typedef/rawpod.go | 8 ++++++--
|
||||
2 files changed, 11 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/pkg/core/typedef/containerinfo.go b/pkg/core/typedef/containerinfo.go
|
||||
index 39cb2cc..d810e5b 100644
|
||||
--- a/pkg/core/typedef/containerinfo.go
|
||||
+++ b/pkg/core/typedef/containerinfo.go
|
||||
@@ -15,6 +15,7 @@
|
||||
package typedef
|
||||
|
||||
import (
|
||||
+ "fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -32,12 +33,15 @@ const (
|
||||
DOCKER
|
||||
// CONTAINERD means containerd container engine
|
||||
CONTAINERD
|
||||
+ // ISULAD means isulad container engine
|
||||
+ ISULAD
|
||||
)
|
||||
|
||||
var (
|
||||
supportEnginesPrefixMap = map[ContainerEngineType]string{
|
||||
DOCKER: "docker://",
|
||||
CONTAINERD: "containerd://",
|
||||
+ ISULAD: "iSulad://",
|
||||
}
|
||||
currentContainerEngines = UNDEFINED
|
||||
setContainerEnginesOnce sync.Once
|
||||
@@ -85,6 +89,7 @@ func fixContainerEngine(containerID string) {
|
||||
for engine, prefix := range supportEnginesPrefixMap {
|
||||
if strings.HasPrefix(containerID, prefix) {
|
||||
currentContainerEngines = engine
|
||||
+ fmt.Printf("The container engine is %v\n", strings.Split(currentContainerEngines.Prefix(), ":")[0])
|
||||
return
|
||||
}
|
||||
}
|
||||
diff --git a/pkg/core/typedef/rawpod.go b/pkg/core/typedef/rawpod.go
|
||||
index 59dfb59..138c580 100644
|
||||
--- a/pkg/core/typedef/rawpod.go
|
||||
+++ b/pkg/core/typedef/rawpod.go
|
||||
@@ -149,7 +149,11 @@ func (pod *RawPod) ExtractContainerInfos() map[string]*ContainerInfo {
|
||||
podCgroupPath := pod.CgroupPath()
|
||||
for _, rawContainer := range nameRawContainersMap {
|
||||
id, err := rawContainer.GetRealContainerID()
|
||||
- if id == "" || err != nil {
|
||||
+ if err != nil {
|
||||
+ fmt.Printf("failed to parse container ID: %v\n", err)
|
||||
+ continue
|
||||
+ }
|
||||
+ if id == "" {
|
||||
continue
|
||||
}
|
||||
idContainersMap[id] = NewContainerInfo(id, podCgroupPath, rawContainer)
|
||||
@@ -169,7 +173,7 @@ func (cont *RawContainer) GetRealContainerID() (string, error) {
|
||||
setContainerEnginesOnce.Do(func() { fixContainerEngine(cont.status.ContainerID) })
|
||||
|
||||
if !currentContainerEngines.Support(cont) {
|
||||
- return "", fmt.Errorf("fatal error : unsupported container engine")
|
||||
+ return "", fmt.Errorf("unsupported container engine: %v", cont.status.ContainerID)
|
||||
}
|
||||
|
||||
cid := cont.status.ContainerID[len(currentContainerEngines.Prefix()):]
|
||||
--
|
||||
2.25.1
|
||||
|
||||
@ -1,197 +0,0 @@
|
||||
From f7cad7376bd823440df1f2f76c1d13cdfa8d4cbe Mon Sep 17 00:00:00 2001
|
||||
From: suoxiaocong <suoxiaocong@kylinos.cn>
|
||||
Date: Mon, 22 Apr 2024 15:52:03 +0800
|
||||
Subject: [PATCH] support systemd cgroup driver
|
||||
|
||||
---
|
||||
pkg/common/constant/constant.go | 7 +++++++
|
||||
pkg/config/config.go | 12 +++++++-----
|
||||
pkg/core/typedef/cgroup/common.go | 11 +++++++++++
|
||||
pkg/core/typedef/containerinfo.go | 16 +++++++++++++++-
|
||||
pkg/core/typedef/rawpod.go | 30 ++++++++++++++++++++++++++++--
|
||||
pkg/rubik/rubik.go | 3 +++
|
||||
6 files changed, 71 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/pkg/common/constant/constant.go b/pkg/common/constant/constant.go
|
||||
index 6a1f69d..cf780b8 100644
|
||||
--- a/pkg/common/constant/constant.go
|
||||
+++ b/pkg/common/constant/constant.go
|
||||
@@ -113,3 +113,10 @@ const (
|
||||
// PSIIOCgroupFileName is name of cgroup file used for detecting io psi
|
||||
PSIIOCgroupFileName = "io.pressure"
|
||||
)
|
||||
+
|
||||
+const (
|
||||
+ // CgroupDriverSystemd is global config for cgroupfs driver choice: systemd driver
|
||||
+ CgroupDriverSystemd = "systemd"
|
||||
+ // CgroupDriverCgroupfs is global config for cgroupfs driver choice: cgroupfs driver
|
||||
+ CgroupDriverCgroupfs = "cgroupfs"
|
||||
+)
|
||||
diff --git a/pkg/config/config.go b/pkg/config/config.go
|
||||
index e0caef3..b8d31a5 100644
|
||||
--- a/pkg/config/config.go
|
||||
+++ b/pkg/config/config.go
|
||||
@@ -44,6 +44,7 @@ type AgentConfig struct {
|
||||
LogDir string `json:"logDir,omitempty"`
|
||||
CgroupRoot string `json:"cgroupRoot,omitempty"`
|
||||
EnabledFeatures []string `json:"enabledFeatures,omitempty"`
|
||||
+ CgroupDriver string `json:"cgroupDriver,omitempty"`
|
||||
}
|
||||
|
||||
// NewConfig returns an config object pointer
|
||||
@@ -51,11 +52,12 @@ func NewConfig(pType parserType) *Config {
|
||||
c := &Config{
|
||||
ConfigParser: defaultParserFactory.getParser(pType),
|
||||
Agent: &AgentConfig{
|
||||
- LogDriver: constant.LogDriverStdio,
|
||||
- LogSize: constant.DefaultLogSize,
|
||||
- LogLevel: constant.DefaultLogLevel,
|
||||
- LogDir: constant.DefaultLogDir,
|
||||
- CgroupRoot: constant.DefaultCgroupRoot,
|
||||
+ LogDriver: constant.LogDriverStdio,
|
||||
+ LogSize: constant.DefaultLogSize,
|
||||
+ LogLevel: constant.DefaultLogLevel,
|
||||
+ LogDir: constant.DefaultLogDir,
|
||||
+ CgroupRoot: constant.DefaultCgroupRoot,
|
||||
+ CgroupDriver: constant.CgroupDriverCgroupfs,
|
||||
},
|
||||
}
|
||||
return c
|
||||
diff --git a/pkg/core/typedef/cgroup/common.go b/pkg/core/typedef/cgroup/common.go
|
||||
index 11002ab..668f951 100644
|
||||
--- a/pkg/core/typedef/cgroup/common.go
|
||||
+++ b/pkg/core/typedef/cgroup/common.go
|
||||
@@ -25,6 +25,17 @@ import (
|
||||
)
|
||||
|
||||
var rootDir = constant.DefaultCgroupRoot
|
||||
+var cgroupDriver = constant.CgroupDriverCgroupfs
|
||||
+
|
||||
+// SetCgroupDriver is the setter of global cgroup driver
|
||||
+func SetCgroupDriver(driver string) {
|
||||
+ cgroupDriver = driver
|
||||
+}
|
||||
+
|
||||
+// GetCgroupDriver is the getter of global cgroup driver
|
||||
+func GetCgroupDriver() string {
|
||||
+ return cgroupDriver
|
||||
+}
|
||||
|
||||
// AbsoluteCgroupPath returns the absolute path of the cgroup
|
||||
func AbsoluteCgroupPath(elem ...string) string {
|
||||
diff --git a/pkg/core/typedef/containerinfo.go b/pkg/core/typedef/containerinfo.go
|
||||
index d810e5b..f751b25 100644
|
||||
--- a/pkg/core/typedef/containerinfo.go
|
||||
+++ b/pkg/core/typedef/containerinfo.go
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
+ "isula.org/rubik/pkg/common/constant"
|
||||
"isula.org/rubik/pkg/core/typedef/cgroup"
|
||||
)
|
||||
|
||||
@@ -45,6 +46,11 @@ var (
|
||||
}
|
||||
currentContainerEngines = UNDEFINED
|
||||
setContainerEnginesOnce sync.Once
|
||||
+ containerEngineScopes = map[ContainerEngineType]string{
|
||||
+ DOCKER: "docker",
|
||||
+ CONTAINERD: "cri-containerd",
|
||||
+ ISULAD: "isulad",
|
||||
+ }
|
||||
)
|
||||
|
||||
// Support returns true when the container uses the container engine
|
||||
@@ -76,10 +82,18 @@ type ContainerInfo struct {
|
||||
// NewContainerInfo creates a ContainerInfo instance
|
||||
func NewContainerInfo(id, podCgroupPath string, rawContainer *RawContainer) *ContainerInfo {
|
||||
requests, limits := rawContainer.GetResourceMaps()
|
||||
+ var path string
|
||||
+ if cgroup.GetCgroupDriver() == constant.CgroupDriverSystemd {
|
||||
+ scopeName := containerEngineScopes[currentContainerEngines]
|
||||
+ path = filepath.Join(podCgroupPath, scopeName+"-"+id+".scope")
|
||||
+ } else {
|
||||
+ path = filepath.Join(podCgroupPath, id)
|
||||
+ }
|
||||
+
|
||||
return &ContainerInfo{
|
||||
Name: rawContainer.status.Name,
|
||||
ID: id,
|
||||
- Hierarchy: cgroup.Hierarchy{Path: filepath.Join(podCgroupPath, id)},
|
||||
+ Hierarchy: cgroup.Hierarchy{Path: path},
|
||||
RequestResources: requests,
|
||||
LimitResources: limits,
|
||||
}
|
||||
diff --git a/pkg/core/typedef/rawpod.go b/pkg/core/typedef/rawpod.go
|
||||
index 138c580..895e9d4 100644
|
||||
--- a/pkg/core/typedef/rawpod.go
|
||||
+++ b/pkg/core/typedef/rawpod.go
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
|
||||
"isula.org/rubik/pkg/common/constant"
|
||||
+ "isula.org/rubik/pkg/core/typedef/cgroup"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -103,7 +104,7 @@ func (pod *RawPod) CgroupPath() string {
|
||||
return ""
|
||||
}
|
||||
/*
|
||||
- example:
|
||||
+ for cgroupfs cgroup driver
|
||||
1. Burstable: pod requests are less than the value of limits and not 0;
|
||||
kubepods/burstable/pod34152897-dbaf-11ea-8cb9-0653660051c3
|
||||
2. BestEffort: pod requests and limits are both 0;
|
||||
@@ -111,7 +112,32 @@ func (pod *RawPod) CgroupPath() string {
|
||||
3. Guaranteed: pod requests are equal to the value set by limits;
|
||||
kubepods/pod34152897-dbaf-11ea-8cb9-0653660051c3
|
||||
*/
|
||||
- return filepath.Join(constant.KubepodsCgroup, qosClassPath, constant.PodCgroupNamePrefix+id)
|
||||
+ /*
|
||||
+ for systemd cgroup driver
|
||||
+ 1. burstable:
|
||||
+ kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podb895995a_e7e5_413e_9bc1_3c3895b3f233.slice
|
||||
+ 2. besteffort
|
||||
+ kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podb895995a_e7e5_413e_9bc1_3c3895b3f233.slice
|
||||
+ 3. guaranteed
|
||||
+ kubepods.slice/kubepods-podb895995a_e7e5_413e_9bc1_3c3895b3f233.slice/
|
||||
+ */
|
||||
+
|
||||
+ if cgroup.GetCgroupDriver() == constant.CgroupDriverSystemd {
|
||||
+ if qosClassPath == "" {
|
||||
+ return filepath.Join(
|
||||
+ constant.KubepodsCgroup+".slice",
|
||||
+ constant.KubepodsCgroup+"-"+constant.PodCgroupNamePrefix+strings.Replace(id, "-", "_", -1)+".slice",
|
||||
+ )
|
||||
+ }
|
||||
+ return filepath.Join(
|
||||
+ constant.KubepodsCgroup+".slice",
|
||||
+ constant.KubepodsCgroup+"-"+qosClassPath+".slice",
|
||||
+ constant.KubepodsCgroup+"-"+qosClassPath+"-"+constant.PodCgroupNamePrefix+strings.Replace(id, "-", "_", -1)+".slice",
|
||||
+ )
|
||||
+ } else {
|
||||
+ return filepath.Join(constant.KubepodsCgroup, qosClassPath, constant.PodCgroupNamePrefix+id)
|
||||
+ }
|
||||
+
|
||||
}
|
||||
|
||||
// ListRawContainers returns all RawContainers in the RawPod
|
||||
diff --git a/pkg/rubik/rubik.go b/pkg/rubik/rubik.go
|
||||
index f55e834..c4fc583 100644
|
||||
--- a/pkg/rubik/rubik.go
|
||||
+++ b/pkg/rubik/rubik.go
|
||||
@@ -126,6 +126,9 @@ func runAgent(ctx context.Context) error {
|
||||
|
||||
// 3. enable cgroup system
|
||||
cgroup.InitMountDir(c.Agent.CgroupRoot)
|
||||
+ if c.Agent.CgroupDriver != "" {
|
||||
+ cgroup.SetCgroupDriver(c.Agent.CgroupDriver)
|
||||
+ }
|
||||
|
||||
// 4. init service components
|
||||
services.InitServiceComponents(defaultRubikFeature)
|
||||
--
|
||||
2.25.1
|
||||
|
||||
45
rubik.spec
45
rubik.spec
@ -1,6 +1,6 @@
|
||||
Name: rubik
|
||||
Version: 2.0.0
|
||||
Release: 4
|
||||
Version: 2.0.1
|
||||
Release: 2
|
||||
Summary: Hybrid Deployment for Cloud Native
|
||||
License: Mulan PSL V2
|
||||
URL: https://gitee.com/openeuler/rubik
|
||||
@ -30,6 +30,9 @@ cp %{SOURCE7} .
|
||||
|
||||
%build
|
||||
sh ./apply-patch
|
||||
%ifarch loongarch64
|
||||
sed -i -e 's/riscv64 s390x/riscv64 s390x loong64/g' -e 's/s390x)/s390x || loong64)/g' vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go
|
||||
%endif
|
||||
make release
|
||||
strip ./build/rubik
|
||||
|
||||
@ -56,7 +59,43 @@ install -Dp ./build_rubik_image.sh %{buildroot}%{_sharedstatedir}/%{name}/build_
|
||||
rm -rf %{buildroot}
|
||||
|
||||
%changelog
|
||||
* Fri May 10 2024 weiyucheng <weiyucheng@kylinos.cn> - 2.0.0-4
|
||||
* Wed Nov 27 2024 vegbir <yangjiaqi16@huawei.com> - 2.0.1-2
|
||||
- Type: bugfix
|
||||
- CVE:NA
|
||||
- SUG:restart
|
||||
- DESC:remove duplicate log
|
||||
|
||||
* Thu Nov 14 2024 vegbir <yangjiaqi16@huawei.com> - 2.0.1-1
|
||||
- Type: bugfix
|
||||
- CVE:NA
|
||||
- SUG:restart
|
||||
- DESC:upgrade rubik version to v2.0.1
|
||||
|
||||
* Fri Sep 20 2024 wujing <wujing50@huawei.com> - 2.0.0-8
|
||||
- Type: bugfix
|
||||
- CVE:NA
|
||||
- SUG:restart
|
||||
- DESC:sync upstream patches
|
||||
|
||||
* Tue Aug 20 2024 vegbir <yangjiaqi16@huawei.com> - 2.0.0-7
|
||||
- Type: bugfix
|
||||
- CVE:NA
|
||||
- SUG:restart
|
||||
- DESC:informer add nri support
|
||||
|
||||
* Tue Jul 02 2024 zhangxianting <zhangxianting@uniontech.com> - 2.0.0-6
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
- SUG:restart
|
||||
- DESC:fix loong64 build error
|
||||
|
||||
* Tue May 21 2024 weiyuan <weiyuan@kylinos.cn> - 2.0.0-5
|
||||
- Type: bugfix
|
||||
- CVE:NA
|
||||
- SUG:restart
|
||||
- DESC:support crio container engine
|
||||
|
||||
* Thu May 09 2024 weiyucheng <weiyucheng@kylinos.cn> - 2.0.0-4
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
- SUG:restart
|
||||
|
||||
14
series.conf
14
series.conf
@ -1,12 +1,2 @@
|
||||
patch/0001-Support-Labels-field-to-configure-QoSLevel.patch
|
||||
patch/0002-rubik-fix-weight-for-iocost-does-not-take-effect.patch
|
||||
patch/0003-rubik-test-coverage-for-PSI-Manager.patch
|
||||
patch/0004-rubik-add-psi-design-documentation.patch
|
||||
patch/0005-rubik-move-fssr-design-document-to-design-dir.patch
|
||||
patch/0006-rubik-fix-that-value-of-memory.high_async_ratio-lost.patch
|
||||
patch/0007-bugfix-fix-typos-calling-order-of-waitgroup.patch
|
||||
patch/0008-rubik-test-coverage-improvement-for-pkg-config.patch
|
||||
patch/0009-rubik-optimize-dynamicAdjust-to-be-clear-and-add-log.patch
|
||||
patch/0010-support-isulad-container-engine.patch
|
||||
patch/0011-support-systemd-cgroup-driver.patch
|
||||
#end of file
|
||||
patch/0001-rubik-remove-duplicate-log.patch
|
||||
# end of file
|
||||
BIN
v2.0.0.tar.gz
BIN
v2.0.0.tar.gz
Binary file not shown.
BIN
v2.0.1.tar.gz
Normal file
BIN
v2.0.1.tar.gz
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user