upgrage to v0.9.1-1

Signed-off-by: zhangxiaoyu <zhangxiaoyu58@huawei.com>
This commit is contained in:
zhangxiaoyu 2021-08-03 15:28:17 +08:00
parent 177894a788
commit 0777151d5d
10 changed files with 7897 additions and 5 deletions

File diff suppressed because it is too large Load Diff

185
0002-fix-some-bugs.patch Normal file
View File

@ -0,0 +1,185 @@
From 81a91cf3834cd351128fa7efa2b3c894af0be32e Mon Sep 17 00:00:00 2001
From: zhangxiaoyu <zhangxiaoyu58@huawei.com>
Date: Tue, 20 Jul 2021 19:46:32 +0800
Subject: [PATCH 2/9] fix some bugs
Signed-off-by: zhangxiaoyu <zhangxiaoyu58@huawei.com>
---
VERSION | 2 +-
cmd/configs.go | 7 ++-
.../binary/infrastructure/infrastructure.go | 63 +++++++++----------
pkg/clusterdeployment/runtime/runtime.go | 2 +-
4 files changed, 37 insertions(+), 37 deletions(-)
diff --git a/VERSION b/VERSION
index 899f24f..f514a2f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.9.0
\ No newline at end of file
+0.9.1
\ No newline at end of file
diff --git a/cmd/configs.go b/cmd/configs.go
index e85ba70..19a182c 100644
--- a/cmd/configs.go
+++ b/cmd/configs.go
@@ -119,9 +119,7 @@ func fillEtcdsIfNotExist(cc *deployConfig) {
return
}
- for _, h := range cc.Masters {
- cc.Etcds = append(cc.Etcds, h)
- }
+ cc.Etcds = append(cc.Etcds, cc.Masters...)
}
func loadDeployConfig(file string) (*deployConfig, error) {
@@ -234,6 +232,9 @@ func appendSoftware(software, packageConfig, defaultPackage []*api.PackageConfig
result := software
for _, p := range packages {
+ if p == nil {
+ continue
+ }
splitSoftware := strings.Split(p.Name, ",")
for _, s := range splitSoftware {
result = append(result, &api.PackageConfig{
diff --git a/pkg/clusterdeployment/binary/infrastructure/infrastructure.go b/pkg/clusterdeployment/binary/infrastructure/infrastructure.go
index 698c16a..53c07c7 100644
--- a/pkg/clusterdeployment/binary/infrastructure/infrastructure.go
+++ b/pkg/clusterdeployment/binary/infrastructure/infrastructure.go
@@ -37,7 +37,9 @@ import (
)
var (
- pmd *packageMD5 = &packageMD5{}
+ pmd *packageMD5 = &packageMD5{
+ MD5s: make(map[string]string),
+ }
)
type SetupInfraTask struct {
@@ -166,14 +168,15 @@ func copyPackage(r runner.Runner, hcg *api.HostConfig, pcfg *api.PackageSrcConfi
}
// 1. calculate package MD5
- if err := pmd.getMD5(src); err != nil {
+ md5, err := pmd.getMD5(src)
+ if err != nil {
return fmt.Errorf("get MD5 failed: %v", err)
}
// 2. package exist on remote host
file, dstDir := filepath.Base(src), pcfg.GetPkgDstPath()
dstPath := filepath.Join(dstDir, file)
- if pmd.checkMD5(r, dstPath) {
+ if checkMD5(r, md5, dstPath) {
logrus.Warnf("package already exist on remote host")
return nil
}
@@ -187,7 +190,7 @@ func copyPackage(r runner.Runner, hcg *api.HostConfig, pcfg *api.PackageSrcConfi
}
// 4. check package MD5
- if !pmd.checkMD5(r, dstPath) {
+ if !checkMD5(r, md5, dstPath) {
return fmt.Errorf("%s MD5 has changed after copy, maybe it is corrupted", file)
}
@@ -220,6 +223,17 @@ func setHostname(r runner.Runner, hcg *api.HostConfig) error {
return nil
}
+func checkMD5(r runner.Runner, md5, path string) bool {
+ output, err := r.RunCommand(fmt.Sprintf("sudo -E /bin/sh -c \"md5sum %s | awk '{print \\$1}'\"", path))
+ if err != nil {
+ logrus.Warnf("get %s MD5 failed: %v", path, err)
+ return false
+ }
+
+ logrus.Debugf("package MD5 value: local %s, remote: %s", md5, output)
+ return md5 == output
+}
+
func NodeInfrastructureSetup(config *api.ClusterConfig, nodeID string, role uint16) error {
if config == nil {
return fmt.Errorf("empty cluster config")
@@ -362,48 +376,33 @@ func NodeInfrastructureDestroy(config *api.ClusterConfig, hostconfig *api.HostCo
}
type packageMD5 struct {
- MD5 string
+ MD5s map[string]string
Lock sync.RWMutex
}
-func (pm *packageMD5) getMD5(path string) error {
+func (pm *packageMD5) getMD5(path string) (string, error) {
pm.Lock.RLock()
- md5str := pm.MD5
- pm.Lock.RUnlock()
+ defer func() {
+ pm.Lock.RUnlock()
+ }()
- if md5str != "" {
- return nil
+ md5str, ok := pm.MD5s[path]
+ if ok {
+ return md5str, nil
}
f, err := os.Open(path)
if err != nil {
- return err
+ return "", err
}
defer f.Close()
h := md5.New()
if _, err := io.Copy(h, f); err != nil {
- return err
- }
-
- pm.Lock.Lock()
- pm.MD5 = fmt.Sprintf("%x", h.Sum(nil))
- pm.Lock.Unlock()
-
- return nil
-}
-
-func (pm *packageMD5) checkMD5(r runner.Runner, path string) bool {
- pm.Lock.RLock()
- md5str := pm.MD5
- pm.Lock.RUnlock()
-
- output, err := r.RunCommand(fmt.Sprintf("sudo -E /bin/sh -c \"md5sum %s | awk '{print \\$1}'\"", path))
- if err != nil {
- logrus.Warnf("get %s MD5 failed: %v", path, err)
- return false
+ return "", err
}
+ md5str = fmt.Sprintf("%x", h.Sum(nil))
+ pm.MD5s[path] = md5str
- logrus.Debugf("package MD5 value: local %s, remote: %s", md5str, output)
- return md5str == output
+ return md5str, nil
}
diff --git a/pkg/clusterdeployment/runtime/runtime.go b/pkg/clusterdeployment/runtime/runtime.go
index 8295dd8..29fab7d 100644
--- a/pkg/clusterdeployment/runtime/runtime.go
+++ b/pkg/clusterdeployment/runtime/runtime.go
@@ -149,7 +149,7 @@ func (dr *dockerRuntime) PrepareRuntimeJson(r runner.Runner, WorkerConfig *api.W
var sb strings.Builder
jsonBase64 := base64.StdEncoding.EncodeToString([]byte(json))
- sb.WriteString(fmt.Sprintf("sudo -E /bin/sh -c \"echo %s | base64 -d > %s\"", jsonBase64, "/etc/docker/daemon.json"))
+ sb.WriteString(fmt.Sprintf("sudo -E /bin/sh -c \"mkdir -p /etc/docker && echo %s | base64 -d > %s\"", jsonBase64, "/etc/docker/daemon.json"))
_, err = r.RunCommand(sb.String())
if err != nil {
return err
--
2.25.1

View File

@ -0,0 +1,40 @@
From 0d411aace540cc13ff9af83371e6d9e6576cafc6 Mon Sep 17 00:00:00 2001
From: haozi007 <liuhao27@huawei.com>
Date: Tue, 27 Jul 2021 12:11:01 +0100
Subject: [PATCH 3/9] use export to replace go env
Signed-off-by: haozi007 <liuhao27@huawei.com>
---
README.en.md | 2 +-
README.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.en.md b/README.en.md
index eb4a5e6..1a7168c 100644
--- a/README.en.md
+++ b/README.en.md
@@ -10,7 +10,7 @@ Eggo is a tool built to provide standard multi-ways for creating Kubernetes clus
```bash
# enable go mod
-$ go env -w GO111MODULE=on
+$ export GO111MODULE=on
# set goproxy
$ go env -w GOPROXY=https://goproxy.cn,direct
# download dependences
diff --git a/README.md b/README.md
index 64709a0..721370f 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Eggo项目旨在解决大规模生产环境K8S集群自动化部署问题、部
```bash
# 使能go mod
-$ go env -w GO111MODULE=on
+$ export GO111MODULE=on
# 设置goproxy为国内代理也可以设置为其他公司的代理
$ go env -w GOPROXY=https://goproxy.cn,direct
# 下载依赖库
--
2.25.1

View File

@ -0,0 +1,625 @@
From ab6783942ef1a76fd77b77ac2de596c702ba4607 Mon Sep 17 00:00:00 2001
From: haozi007 <liuhao27@huawei.com>
Date: Tue, 27 Jul 2021 08:30:51 +0100
Subject: [PATCH 4/9] add hooks apis for cluster
Signed-off-by: haozi007 <liuhao27@huawei.com>
---
config/openEuler.config | 4 +-
pkg/api/types.go | 10 ++
pkg/clusterdeployment/binary/binary.go | 149 ++++++++++++++++++
.../binary/commontools/runshell.go | 39 +++++
pkg/clusterdeployment/clusterdeploy.go | 48 +++---
pkg/utils/nodemanager/node.go | 8 +-
pkg/utils/nodemanager/nodemanager.go | 19 +--
pkg/utils/utils.go | 2 +-
pkg/utils/utils_test.go | 121 ++++++++++++++
9 files changed, 353 insertions(+), 47 deletions(-)
create mode 100644 pkg/clusterdeployment/binary/commontools/runshell.go
create mode 100644 pkg/utils/utils_test.go
diff --git a/config/openEuler.config b/config/openEuler.config
index eac76ff..6ced22f 100755
--- a/config/openEuler.config
+++ b/config/openEuler.config
@@ -21,8 +21,8 @@ install:
package-source:
type: tar.gz
dstpath: ""
- armsrc: /root/pacakges/pacakges-arm.tar.gz
- x86src: /root/pacakges/packages-x86.tar.gz
+ armsrc: /root/packages/packages-arm.tar.gz
+ x86src: /root/packages/packages-x86.tar.gz
container:
- name: iSulad
type: repo
diff --git a/pkg/api/types.go b/pkg/api/types.go
index 8097779..403dde3 100644
--- a/pkg/api/types.go
+++ b/pkg/api/types.go
@@ -232,6 +232,16 @@ type EtcdAPI interface {
type ClusterManagerAPI interface {
// TODO: should add other dependence cluster configurations
+ PreCreateClusterHooks() error
+ PostCreateClusterHooks() error
+ PreDeleteClusterHooks()
+ PostDeleteClusterHooks()
+
+ PreNodeJoinHooks(node *HostConfig) error
+ PostNodeJoinHooks(node *HostConfig) error
+ PreNodeCleanupHooks(node *HostConfig)
+ PostNodeCleanupHooks(node *HostConfig)
+
ClusterControlPlaneInit(node *HostConfig) error
ClusterNodeJoin(node *HostConfig) error
ClusterNodeCleanup(node *HostConfig, delType uint16) error
diff --git a/pkg/clusterdeployment/binary/binary.go b/pkg/clusterdeployment/binary/binary.go
index 40373e3..96c0dd1 100644
--- a/pkg/clusterdeployment/binary/binary.go
+++ b/pkg/clusterdeployment/binary/binary.go
@@ -22,6 +22,7 @@ import (
"isula.org/eggo/pkg/clusterdeployment/binary/addons"
"isula.org/eggo/pkg/clusterdeployment/binary/bootstrap"
"isula.org/eggo/pkg/clusterdeployment/binary/cleanupcluster"
+ "isula.org/eggo/pkg/clusterdeployment/binary/commontools"
"isula.org/eggo/pkg/clusterdeployment/binary/controlplane"
"isula.org/eggo/pkg/clusterdeployment/binary/coredns"
"isula.org/eggo/pkg/clusterdeployment/binary/etcdcluster"
@@ -29,9 +30,11 @@ import (
"isula.org/eggo/pkg/clusterdeployment/binary/loadbalance"
"isula.org/eggo/pkg/clusterdeployment/manager"
"isula.org/eggo/pkg/utils"
+ "isula.org/eggo/pkg/utils/dependency"
"isula.org/eggo/pkg/utils/kubectl"
"isula.org/eggo/pkg/utils/nodemanager"
"isula.org/eggo/pkg/utils/runner"
+ "isula.org/eggo/pkg/utils/task"
"github.com/sirupsen/logrus"
)
@@ -415,3 +418,149 @@ func (bcp *BinaryClusterDeployment) Finish() {
bcp.connections = make(map[string]runner.Runner)
logrus.Info("do finish binary deployment success")
}
+
+func (bcp *BinaryClusterDeployment) PreCreateClusterHooks() error {
+ role := []uint16{api.LoadBalance, api.ETCD, api.Master, api.Worker}
+ if err := dependency.HookSchedule(bcp.config, bcp.config.Nodes, role, api.SchedulePreJoin); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (bcp *BinaryClusterDeployment) PostCreateClusterHooks() error {
+ role := []uint16{api.LoadBalance, api.ETCD, api.Master, api.Worker}
+ if err := dependency.HookSchedule(bcp.config, bcp.config.Nodes, role, api.SchedulePostJoin); err != nil {
+ return err
+ }
+
+ if err := checkK8sServices(bcp.config.Nodes); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (bcp *BinaryClusterDeployment) PreDeleteClusterHooks() {
+ role := []uint16{api.Worker, api.Master, api.ETCD, api.LoadBalance}
+ if err := dependency.HookSchedule(bcp.config, bcp.config.Nodes, role, api.SchedulePreCleanup); err != nil {
+ logrus.Warnf("Ignore: Delete cluster PreHook failed: %v", err)
+ }
+}
+
+func (bcp *BinaryClusterDeployment) PostDeleteClusterHooks() {
+ role := []uint16{api.Worker, api.Master, api.ETCD, api.LoadBalance}
+ if err := dependency.HookSchedule(bcp.config, bcp.config.Nodes, role, api.SchedulePostCleanup); err != nil {
+ logrus.Warnf("Ignore: Delete cluster PostHook failed: %v", err)
+ }
+}
+
+func (bcp *BinaryClusterDeployment) PreNodeJoinHooks(node *api.HostConfig) error {
+ role := []uint16{api.Master, api.Worker, api.ETCD}
+ if err := dependency.HookSchedule(bcp.config, []*api.HostConfig{node}, role, api.SchedulePreJoin); err != nil {
+ return err
+ }
+ return nil
+}
+
+func checkWorkerServices(workers []string) error {
+ if len(workers) == 0 {
+ return nil
+ }
+ shell := `#!/bin/bash
+systemctl status kubelet | tail -20
+[[ $? -ne 0 ]] && exit 1
+systemctl status kube-proxy | tail -20
+[[ $? -ne 0 ]] && exit 1
+exit 0
+`
+ checker := task.NewTaskInstance(
+ &commontools.RunShellTask{
+ ShellName: "checkWorker",
+ Shell: shell,
+ },
+ )
+
+ return nodemanager.RunTaskOnNodes(checker, workers)
+}
+
+func checkMasterServices(masters []string) error {
+ if len(masters) == 0 {
+ return nil
+ }
+ shell := `#!/bin/bash
+systemctl status kube-apiserver | tail -20
+[[ $? -ne 0 ]] && exit 1
+systemctl status kube-controller-manager | tail -20
+[[ $? -ne 0 ]] && exit 1
+systemctl status kube-scheduler | tail -20
+[[ $? -ne 0 ]] && exit 1
+exit 0
+`
+ checker := task.NewTaskInstance(
+ &commontools.RunShellTask{
+ ShellName: "checkMaster",
+ Shell: shell,
+ },
+ )
+
+ return nodemanager.RunTaskOnNodes(checker, masters)
+}
+
+func checkK8sServices(nodes []*api.HostConfig) error {
+ var wokers, masters []string
+
+ for _, n := range nodes {
+ if utils.IsType(n.Type, api.Master) {
+ masters = append(masters, n.Address)
+ }
+ if utils.IsType(n.Type, api.Worker) {
+ wokers = append(wokers, n.Address)
+ }
+ }
+ if err := checkWorkerServices(wokers); err != nil {
+ return err
+ }
+ return checkMasterServices(masters)
+}
+
+func (bcp *BinaryClusterDeployment) PostNodeJoinHooks(node *api.HostConfig) error {
+ role := []uint16{api.Master, api.Worker, api.ETCD}
+ if err := dependency.HookSchedule(bcp.config, []*api.HostConfig{node}, role, api.SchedulePostJoin); err != nil {
+ return err
+ }
+
+ // taint and label for master node
+ roles := node.Type
+ for _, n := range bcp.config.Nodes {
+ if n.Name == node.Name {
+ roles |= n.Type
+ break
+ }
+ }
+
+ if utils.IsType(roles, (api.Master & api.Worker)) {
+ if err := taintAndLabelNode(bcp.config.Name, node.Name); err != nil {
+ return err
+ }
+ }
+
+ // check node status
+ if err := checkK8sServices([]*api.HostConfig{node}); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (bcp *BinaryClusterDeployment) PreNodeCleanupHooks(node *api.HostConfig) {
+ role := []uint16{api.Worker, api.Master, api.ETCD}
+ if err := dependency.HookSchedule(bcp.config, []*api.HostConfig{node}, role, api.SchedulePreCleanup); err != nil {
+ logrus.Warnf("Ignore: Delete Node PreHook failed: %v", err)
+ }
+}
+
+func (bcp *BinaryClusterDeployment) PostNodeCleanupHooks(node *api.HostConfig) {
+ role := []uint16{api.Worker, api.Master, api.ETCD}
+ if err := dependency.HookSchedule(bcp.config, []*api.HostConfig{node}, role, api.SchedulePostCleanup); err != nil {
+ logrus.Warnf("Ignore: Delete Node PostHook failed: %v", err)
+ }
+}
diff --git a/pkg/clusterdeployment/binary/commontools/runshell.go b/pkg/clusterdeployment/binary/commontools/runshell.go
new file mode 100644
index 0000000..266c488
--- /dev/null
+++ b/pkg/clusterdeployment/binary/commontools/runshell.go
@@ -0,0 +1,39 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * eggo licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ * http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: haozi007
+ * Create: 2021-07-27
+ * Description: util for run shell
+ ******************************************************************************/
+package commontools
+
+import (
+ "github.com/sirupsen/logrus"
+ "isula.org/eggo/pkg/api"
+ "isula.org/eggo/pkg/utils/runner"
+)
+
+type RunShellTask struct {
+ ShellName string
+ Shell string
+}
+
+func (ct *RunShellTask) Name() string {
+ return "RunShellTask"
+}
+
+func (ct *RunShellTask) Run(r runner.Runner, hcf *api.HostConfig) error {
+ out, err := r.RunShell(ct.Shell, ct.ShellName)
+ if err != nil {
+ return err
+ }
+ logrus.Debugf("run shell: %s, get out:\n%s", ct.ShellName, out)
+ return nil
+}
diff --git a/pkg/clusterdeployment/clusterdeploy.go b/pkg/clusterdeployment/clusterdeploy.go
index 353e877..e5f69f0 100644
--- a/pkg/clusterdeployment/clusterdeploy.go
+++ b/pkg/clusterdeployment/clusterdeploy.go
@@ -25,7 +25,6 @@ import (
_ "isula.org/eggo/pkg/clusterdeployment/binary"
"isula.org/eggo/pkg/clusterdeployment/manager"
"isula.org/eggo/pkg/utils"
- "isula.org/eggo/pkg/utils/dependency"
"isula.org/eggo/pkg/utils/nodemanager"
)
@@ -61,9 +60,8 @@ func doCreateCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig) er
}
}
- // Step2: Hook SchedulePreJoin
- role := []uint16{api.LoadBalance, api.ETCD, api.Master, api.Worker}
- if err := dependency.HookSchedule(cc, cc.Nodes, role, api.SchedulePreJoin); err != nil {
+ // Step2: run precreate cluster hooks
+ if err := handler.PreCreateClusterHooks(); err != nil {
return err
}
@@ -94,20 +92,21 @@ func doCreateCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig) er
return err
}
}
- //Step7: setup addons for cluster
// wait all nodes ready
if err := nodemanager.WaitNodesFinishWithProgress(joinNodeIDs, time.Minute*5); err != nil {
return err
}
+ //Step7: setup addons for cluster
if err := handler.AddonsSetup(); err != nil {
return err
}
- // Step8: Hook SchedulePostJoin
- if err := dependency.HookSchedule(cc, cc.Nodes, role, api.SchedulePostJoin); err != nil {
+ // Step8: run postcreate cluster hooks
+ if err := handler.PostCreateClusterHooks(); err != nil {
return err
}
+
allNodes := utils.GetAllIPs(cc.Nodes)
if err := nodemanager.WaitNodesFinishWithProgress(allNodes, time.Minute*5); err != nil {
return err
@@ -151,9 +150,8 @@ func doJoinNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, hostcon
return err
}
- // Hook SchedulePreJoin
- role := []uint16{api.Master, api.Worker, api.ETCD}
- if err := dependency.HookSchedule(cc, []*api.HostConfig{hostconfig}, role, api.SchedulePreJoin); err != nil {
+ // Pre node join Hooks
+ if err := handler.PreNodeJoinHooks(hostconfig); err != nil {
return err
}
@@ -167,8 +165,8 @@ func doJoinNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, hostcon
return err
}
- // Hook SchedulePostJoin
- if err := dependency.HookSchedule(cc, []*api.HostConfig{hostconfig}, role, api.SchedulePostJoin); err != nil {
+ // Post node join Hooks
+ if err := handler.PostNodeJoinHooks(hostconfig); err != nil {
return err
}
@@ -223,11 +221,8 @@ func JoinNode(cc *api.ClusterConfig, hostconfig *api.HostConfig) error {
}
func doDeleteNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, h *api.HostConfig) error {
- // Hook SchedulePreCleanup
- role := []uint16{api.Worker, api.Master, api.ETCD}
- if err := dependency.HookSchedule(cc, []*api.HostConfig{h}, role, api.SchedulePreCleanup); err != nil {
- return err
- }
+ // Pre node delete Hooks
+ handler.PreNodeCleanupHooks(h)
if utils.IsType(h.Type, api.Worker) {
if err := handler.ClusterNodeCleanup(h, api.Worker); err != nil {
@@ -247,10 +242,8 @@ func doDeleteNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, h *ap
}
}
- // Hook SchedulePostCleanup
- if err := dependency.HookSchedule(cc, []*api.HostConfig{h}, role, api.SchedulePostCleanup); err != nil {
- return err
- }
+ // Post node delete Hooks
+ handler.PostNodeCleanupHooks(h)
if err := handler.MachineInfraDestroy(h); err != nil {
logrus.Warnf("cleanup infrastructure for node: %s failed: %v", h.Name, err)
@@ -290,11 +283,8 @@ func DeleteNode(cc *api.ClusterConfig, hostconfig *api.HostConfig) error {
}
func doRemoveCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig) {
- // Step1: Hook SchedulePreCleanup
- role := []uint16{api.Worker, api.Master, api.ETCD, api.LoadBalance}
- if err := dependency.HookSchedule(cc, cc.Nodes, role, api.SchedulePreCleanup); err != nil {
- logrus.Errorf("Hook SchedulePreCleanup failed: %v", err)
- }
+ // Step1: Pre delete cluster Hooks
+ handler.PreDeleteClusterHooks()
// Step2: cleanup addons
err := handler.AddonsDestroy()
@@ -344,10 +334,8 @@ func doRemoveCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig) {
logrus.Warnf("[cluster] cleanup etcd cluster failed: %v", err)
}
- // Step7: Hook SchedulePostCleanup
- if err := dependency.HookSchedule(cc, cc.Nodes, role, api.SchedulePostCleanup); err != nil {
- logrus.Errorf("Hook SchedulePostCleanup failed: %v", err)
- }
+ // Step7: Post delete cluster Hooks
+ handler.PostDeleteClusterHooks()
// Step8: cleanup infrastructure
for _, n := range cc.Nodes {
diff --git a/pkg/utils/nodemanager/node.go b/pkg/utils/nodemanager/node.go
index b1235c7..d610e3b 100644
--- a/pkg/utils/nodemanager/node.go
+++ b/pkg/utils/nodemanager/node.go
@@ -115,13 +115,15 @@ func (n *Node) updateNodeStatus(message string, status int) {
}
}
-func (n *Node) PushTask(task task.Task) bool {
- if n.status.HasError() {
+func (n *Node) PushTask(t task.Task) bool {
+ // only run ignore error tasks to cleanup node
+ if n.status.HasError() && task.IsIgnoreError(t) {
logrus.Debugf("node finished with error: %v", n.status.Message)
return false
}
+
select {
- case n.queue <- task:
+ case n.queue <- t:
n.updateTotalCnt()
return true
default:
diff --git a/pkg/utils/nodemanager/nodemanager.go b/pkg/utils/nodemanager/nodemanager.go
index fc3f1ee..b135890 100644
--- a/pkg/utils/nodemanager/nodemanager.go
+++ b/pkg/utils/nodemanager/nodemanager.go
@@ -92,8 +92,8 @@ func doRetryPushTask(t task.Task, retryNodes []*Node) error {
}
func RunTaskOnNodes(t task.Task, nodes []string) error {
- manager.lock.RLock()
- defer manager.lock.RUnlock()
+ manager.lock.Lock()
+ defer manager.lock.Unlock()
var retryNodes []*Node
for _, id := range nodes {
if n, ok := manager.nodes[id]; ok {
@@ -112,8 +112,8 @@ func RunTaskOnNodes(t task.Task, nodes []string) error {
func RunTaskOnAll(t task.Task) error {
var retryNodes []*Node
- manager.lock.RLock()
- defer manager.lock.RUnlock()
+ manager.lock.Lock()
+ defer manager.lock.Unlock()
for id, n := range manager.nodes {
if n.PushTask(t) {
continue
@@ -126,8 +126,8 @@ func RunTaskOnAll(t task.Task) error {
}
func RunTasksOnNode(tasks []task.Task, node string) error {
- manager.lock.RLock()
- defer manager.lock.RUnlock()
+ manager.lock.Lock()
+ defer manager.lock.Unlock()
for _, t := range tasks {
if n, ok := manager.nodes[node]; ok {
@@ -152,9 +152,6 @@ func RunTasksOnNode(tasks []task.Task, node string) error {
}
func RunTasksOnNodes(tasks []task.Task, nodes []string) error {
- manager.lock.RLock()
- defer manager.lock.RUnlock()
-
for _, n := range nodes {
if err := RunTasksOnNode(tasks, n); err != nil {
logrus.Errorf("run tasks on node %s failed: %v", n, err)
@@ -166,8 +163,8 @@ func RunTasksOnNodes(tasks []task.Task, nodes []string) error {
}
func RunTaskOnOneNode(t task.Task, nodes []string) (string, error) {
- manager.lock.RLock()
- defer manager.lock.RUnlock()
+ manager.lock.Lock()
+ defer manager.lock.Unlock()
for _, id := range nodes {
n, ok := manager.nodes[id]
diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go
index 86be713..72f8777 100644
--- a/pkg/utils/utils.go
+++ b/pkg/utils/utils.go
@@ -51,7 +51,7 @@ func GetEggoDir() string {
}
func IsType(curType uint16, expectedType uint16) bool {
- return curType&expectedType != 0
+ return (curType & expectedType) == expectedType
}
func AddSudo(cmd string) string {
diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go
new file mode 100644
index 0000000..df3743f
--- /dev/null
+++ b/pkg/utils/utils_test.go
@@ -0,0 +1,121 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * eggo licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ * http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: zhangxiaoyu
+ * Create: 2021-05-19
+ * Description: eggo utils ut
+ ******************************************************************************/
+
+package utils
+
+import (
+ "sort"
+ "testing"
+)
+
+func TestIsType(t *testing.T) {
+ cs := []struct {
+ name string
+ val1 uint16
+ val2 uint16
+ expect bool
+ }{
+ {
+ "test1",
+ 1,
+ 1,
+ true,
+ },
+ {
+ "test2",
+ 7,
+ 7,
+ true,
+ },
+ {
+ "test3",
+ 123,
+ 123,
+ true,
+ },
+ {
+ "test11",
+ 8,
+ 1,
+ false,
+ },
+ {
+ "test12",
+ 0,
+ 1,
+ false,
+ },
+ {
+ "test13",
+ 120,
+ 121,
+ false,
+ },
+ }
+
+ for _, c := range cs {
+ if IsType(c.val1, c.val2) != c.expect {
+ t.Errorf("case: %s, expect: %v, get: %v", c.name, c.expect, IsType(c.val1, c.val2))
+ }
+ }
+}
+
+func TestRemoveDupString(t *testing.T) {
+ cs := []struct {
+ name string
+ val []string
+ expect []string
+ }{
+ {
+ "test1",
+ []string{"abc", "bcd"},
+ []string{"abc", "bcd"},
+ },
+ {
+ "test2",
+ []string{"abc", "bcd", "abc"},
+ []string{"abc", "bcd"},
+ },
+ {
+ "test3",
+ []string{"xxx", "bcd"},
+ []string{"bcd", "xxx"},
+ },
+ {
+ "test4",
+ []string{"xxx", "xxx", "xxx"},
+ []string{"xxx"},
+ },
+ }
+
+ for _, c := range cs {
+ flag := true
+ got := RemoveDupString(c.val)
+ if len(got) == len(c.expect) {
+ sort.Strings(got)
+ sort.Strings(c.expect)
+ for i := 0; i < len(got); i++ {
+ if got[i] != c.expect[i] {
+ flag = false
+ }
+ }
+ } else {
+ flag = false
+ }
+ if !flag {
+ t.Errorf("case: %s, expect: %v, get: %v", c.name, c.expect, got)
+ }
+ }
+}
--
2.25.1

View File

@ -0,0 +1,126 @@
From 70cee4a328f14c10daa2c8a49e846565bd7f52b0 Mon Sep 17 00:00:00 2001
From: zhangxiaoyu <zhangxiaoyu58@huawei.com>
Date: Wed, 28 Jul 2021 15:39:49 +0800
Subject: [PATCH 5/9] add ip/name for hosts
Signed-off-by: zhangxiaoyu <zhangxiaoyu58@huawei.com>
---
.../binary/infrastructure/infrastructure.go | 76 ++++++++++++++++---
1 file changed, 67 insertions(+), 9 deletions(-)
diff --git a/pkg/clusterdeployment/binary/infrastructure/infrastructure.go b/pkg/clusterdeployment/binary/infrastructure/infrastructure.go
index 53c07c7..bf6ed3a 100644
--- a/pkg/clusterdeployment/binary/infrastructure/infrastructure.go
+++ b/pkg/clusterdeployment/binary/infrastructure/infrastructure.go
@@ -72,8 +72,8 @@ func (it *SetupInfraTask) Run(r runner.Runner, hcg *api.HostConfig) error {
return err
}
- if err := setHostname(r, hcg); err != nil {
- logrus.Errorf("set hostname failed: %v", err)
+ if err := addHostNameIP(r, hcg); err != nil {
+ logrus.Errorf("add host name ip failed: %v", err)
return err
}
@@ -209,15 +209,69 @@ func copyPackage(r runner.Runner, hcg *api.HostConfig, pcfg *api.PackageSrcConfi
return nil
}
-func setHostname(r runner.Runner, hcg *api.HostConfig) error {
- if hcg.Name == "" {
- logrus.Warnf("no name for %s", hcg.Address)
+func addHostNameIP(r runner.Runner, hcg *api.HostConfig) error {
+ shell := `
+#!/bin/bash
+cat /etc/hosts | grep "{{ .Address }}" | grep "{{ .Name }}"
+if [ $? -eq 0 ]; then
+ exit 0
+fi
+
+echo "{{ .Address }} {{ .Name }}" >> /etc/hosts
+exit 0
+`
+
+ if hcg.Name == "" || hcg.Address == "" {
+ logrus.Warnf("no name or address")
+ return nil
+ }
+
+ datastore := make(map[string]interface{})
+ datastore["Address"] = hcg.Address
+ datastore["Name"] = hcg.Name
+
+ cmdStr, err := template.TemplateRender(shell, datastore)
+ if err != nil {
+ return err
+ }
+
+ _, err = r.RunShell(cmdStr, "addHostNameIP")
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func removeHostNameIP(r runner.Runner, hcg *api.HostConfig) error {
+ shell := `
+#!/bin/bash
+cat /etc/hosts | grep "{{ .Address }}" | grep "{{ .Name }}"
+if [ $? -ne 0 ]; then
+ exit 0
+fi
+
+sed -i '/{{ .Address }} {{ .Name }}/d' /etc/hosts
+exit 0
+`
+
+ if hcg.Name == "" || hcg.Address == "" {
+ logrus.Warnf("no name or address")
return nil
}
- _, err := r.RunCommand(fmt.Sprintf("sudo -E /bin/sh -c \"hostnamectl set-hostname %s\"", hcg.Name))
+ datastore := make(map[string]interface{})
+ datastore["Address"] = hcg.Address
+ datastore["Name"] = hcg.Name
+
+ cmdStr, err := template.TemplateRender(shell, datastore)
if err != nil {
- return fmt.Errorf("set Hostname %s for %s failed: %v", hcg.Name, hcg.Address, err)
+ return err
+ }
+
+ _, err = r.RunShell(cmdStr, "removeHostNameIP")
+ if err != nil {
+ return err
}
return nil
@@ -273,6 +327,10 @@ func (it *DestroyInfraTask) Run(r runner.Runner, hcg *api.HostConfig) error {
dependency.RemoveDependency(r, it.roleInfra, hcg, it.packageSrc.GetPkgDstPath())
+ if err := removeHostNameIP(r, hcg); err != nil {
+ logrus.Errorf("remove host name ip failed: %v", err)
+ }
+
removeFirewallPort(r, it.roleInfra.OpenPorts)
cleanupcluster.PostCleanup(r)
@@ -381,9 +439,9 @@ type packageMD5 struct {
}
func (pm *packageMD5) getMD5(path string) (string, error) {
- pm.Lock.RLock()
+ pm.Lock.Lock()
defer func() {
- pm.Lock.RUnlock()
+ pm.Lock.Unlock()
}()
md5str, ok := pm.MD5s[path]
--
2.25.1

View File

@ -0,0 +1,67 @@
From f0f5461862287cc3242db5194c38d17e807c9124 Mon Sep 17 00:00:00 2001
From: haozi007 <liuhao27@huawei.com>
Date: Wed, 28 Jul 2021 08:20:14 +0100
Subject: [PATCH 6/9] remove unuseful code
Signed-off-by: haozi007 <liuhao27@huawei.com>
---
pkg/api/types.go | 1 -
pkg/clusterdeployment/binary/binary.go | 4 ----
pkg/clusterdeployment/clusterdeploy.go | 14 --------------
3 files changed, 19 deletions(-)
diff --git a/pkg/api/types.go b/pkg/api/types.go
index 403dde3..71f4f14 100644
--- a/pkg/api/types.go
+++ b/pkg/api/types.go
@@ -249,7 +249,6 @@ type ClusterManagerAPI interface {
ClusterStatus() (*ClusterStatus, error)
AddonsSetup() error
AddonsDestroy() error
- TaintAndLabelNode(name string) error
}
type LoadBalancerAPI interface {
diff --git a/pkg/clusterdeployment/binary/binary.go b/pkg/clusterdeployment/binary/binary.go
index 96c0dd1..cc6a6a0 100644
--- a/pkg/clusterdeployment/binary/binary.go
+++ b/pkg/clusterdeployment/binary/binary.go
@@ -139,10 +139,6 @@ func taintAndLabelNode(clusterID string, name string) error {
return nil
}
-func (bcp *BinaryClusterDeployment) TaintAndLabelNode(name string) error {
- return taintAndLabelNode(bcp.config.Name, name)
-}
-
func (bcp *BinaryClusterDeployment) taintAndLabelMasterNodes() error {
for _, node := range bcp.config.Nodes {
if (node.Type&api.Master != 0) && (node.Type&api.Worker != 0) {
diff --git a/pkg/clusterdeployment/clusterdeploy.go b/pkg/clusterdeployment/clusterdeploy.go
index e5f69f0..016da2b 100644
--- a/pkg/clusterdeployment/clusterdeploy.go
+++ b/pkg/clusterdeployment/clusterdeploy.go
@@ -175,20 +175,6 @@ func doJoinNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, hostcon
return err
}
- roles := hostconfig.Type
- for _, node := range cc.Nodes {
- if node.Name == hostconfig.Name {
- roles |= node.Type
- break
- }
- }
-
- if utils.IsType(roles, api.Master) && utils.IsType(roles, api.Worker) {
- if err := handler.TaintAndLabelNode(hostconfig.Name); err != nil {
- return err
- }
- }
-
return nil
}
--
2.25.1

View File

@ -0,0 +1,569 @@
From 9fd05bda9c5789bee45de142468fec0227929927 Mon Sep 17 00:00:00 2001
From: haozi007 <liuhao27@huawei.com>
Date: Thu, 29 Jul 2021 12:04:58 +0100
Subject: [PATCH 7/9] support rollback of create cluster
1. rollback if failed of create cluster;
2. support partial success of create cluster;
Signed-off-by: haozi007 <liuhao27@huawei.com>
---
cmd/deploy.go | 66 +++++-
pkg/api/tools.go | 41 ++++
pkg/api/types.go | 7 +
pkg/clusterdeployment/binary/binary.go | 3 +-
.../binary/cleanupcluster/cleanupnode.go | 4 +-
pkg/clusterdeployment/clusterdeploy.go | 208 +++++++++++++-----
pkg/utils/nodemanager/node.go | 2 +-
pkg/utils/nodemanager/nodemanager.go | 27 ++-
pkg/utils/utils.go | 4 +
9 files changed, 292 insertions(+), 70 deletions(-)
diff --git a/cmd/deploy.go b/cmd/deploy.go
index 3aba440..12fdd8e 100644
--- a/cmd/deploy.go
+++ b/cmd/deploy.go
@@ -24,8 +24,64 @@ import (
"isula.org/eggo/pkg/clusterdeployment"
)
-func deploy(ccfg *api.ClusterConfig) error {
- return clusterdeployment.CreateCluster(ccfg)
+func deploy(conf *deployConfig) error {
+ if err := saveDeployConfig(conf, savedDeployConfigPath(conf.ClusterID)); err != nil {
+ return fmt.Errorf("save deploy config failed: %v", err)
+ }
+
+ ccfg := toClusterdeploymentConfig(conf)
+
+ cstatus, err := clusterdeployment.CreateCluster(ccfg)
+ if err != nil {
+ return err
+ }
+
+ if cstatus.FailureCnt > 0 {
+ // if partial success, just update config of cluster, remove failed nodes
+ var tmp []*HostConfig
+ for _, n := range conf.Masters {
+ if success, ok := cstatus.StatusOfNodes[n.Ip]; ok && !success {
+ continue
+ }
+ tmp = append(tmp, n)
+ }
+ conf.Masters = tmp
+
+ tmp = nil
+ for _, n := range conf.Workers {
+ if success, ok := cstatus.StatusOfNodes[n.Ip]; ok && !success {
+ continue
+ }
+ tmp = append(tmp, n)
+ }
+ conf.Workers = tmp
+
+ tmp = nil
+ for _, n := range conf.Etcds {
+ if success, ok := cstatus.StatusOfNodes[n.Ip]; ok && !success {
+ continue
+ }
+ tmp = append(tmp, n)
+ }
+ conf.Etcds = tmp
+
+ err = saveDeployConfig(conf, savedDeployConfigPath(conf.ClusterID))
+ if err != nil {
+ fmt.Printf("")
+ clusterdeployment.RemoveCluster(ccfg)
+ return fmt.Errorf("update config of cluster failed: %v", err)
+ }
+ fmt.Printf("update config of cluster: %s", conf.ClusterID)
+ }
+
+ fmt.Print(cstatus.Show())
+
+ if cstatus.Working {
+ fmt.Printf("To start using cluster: %s, you need following as a regular user:\n\n", ccfg.Name)
+ fmt.Printf("\texport KUBECONFIG=%s/admin.conf\n\n", api.GetClusterHomePath(ccfg.Name))
+ }
+
+ return err
}
func deployCluster(cmd *cobra.Command, args []string) error {
@@ -40,11 +96,7 @@ func deployCluster(cmd *cobra.Command, args []string) error {
// TODO: make sure config valid
- if err := saveDeployConfig(conf, savedDeployConfigPath(conf.ClusterID)); err != nil {
- return fmt.Errorf("save deploy config failed: %v", err)
- }
-
- if err := deploy(toClusterdeploymentConfig(conf)); err != nil {
+ if err := deploy(conf); err != nil {
return err
}
diff --git a/pkg/api/tools.go b/pkg/api/tools.go
index a73b9ea..2e4fb4c 100644
--- a/pkg/api/tools.go
+++ b/pkg/api/tools.go
@@ -7,6 +7,7 @@ import (
"github.com/sirupsen/logrus"
"isula.org/eggo/pkg/constants"
+ "k8s.io/apimachinery/pkg/util/json"
)
var (
@@ -80,3 +81,43 @@ func GetEtcdServers(ecc *EtcdClusterConfig) string {
func IsCleanupSchedule(schedule Schedule) bool {
return schedule == SchedulePreCleanup || schedule == SchedulePostCleanup
}
+
+func (hc HostConfig) DeepCopy() (*HostConfig, error) {
+ b, err := json.Marshal(hc)
+ if err != nil {
+ return nil, err
+ }
+ var result HostConfig
+ err = json.Unmarshal(b, &result)
+ return &result, err
+}
+
+func (cs *ClusterStatus) Show() string {
+ var sb strings.Builder
+ var fb strings.Builder
+
+ sb.WriteString("-------------------------------\n")
+ sb.WriteString("message: ")
+ sb.WriteString(cs.Message)
+ sb.WriteString("\nsummary: \n")
+ if cs.Working {
+ sb.WriteString(cs.ControlPlane)
+ sb.WriteString("\t\tsuccess")
+ sb.WriteString("\n")
+ }
+ for ip, ok := range cs.StatusOfNodes {
+ if ok {
+ sb.WriteString(ip)
+ sb.WriteString("\t\tsuccess")
+ sb.WriteString("\n")
+ } else {
+ fb.WriteString(ip)
+ fb.WriteString("\t\tfailure")
+ fb.WriteString("\n")
+ }
+ }
+ sb.WriteString(fb.String())
+ sb.WriteString("-------------------------------\n")
+
+ return sb.String()
+}
diff --git a/pkg/api/types.go b/pkg/api/types.go
index 71f4f14..86da853 100644
--- a/pkg/api/types.go
+++ b/pkg/api/types.go
@@ -215,7 +215,14 @@ type ClusterConfig struct {
}
type ClusterStatus struct {
+ Message string `json:"message"`
+ ControlPlane string `json:"controlplane"`
+ Working bool `json:"working"`
+ StatusOfNodes map[string]bool `json:"statusOfNodes"`
+ SuccessCnt uint32 `json:"successCnt"`
+ FailureCnt uint32 `json:"failureCnt"`
}
+
type InfrastructureAPI interface {
// TODO: should add other dependence cluster configurations
MachineInfraSetup(machine *HostConfig) error
diff --git a/pkg/clusterdeployment/binary/binary.go b/pkg/clusterdeployment/binary/binary.go
index cc6a6a0..dd260b0 100644
--- a/pkg/clusterdeployment/binary/binary.go
+++ b/pkg/clusterdeployment/binary/binary.go
@@ -533,7 +533,8 @@ func (bcp *BinaryClusterDeployment) PostNodeJoinHooks(node *api.HostConfig) erro
}
}
- if utils.IsType(roles, (api.Master & api.Worker)) {
+ // check whether the node is worker and master
+ if utils.IsType(roles, (api.Master | api.Worker)) {
if err := taintAndLabelNode(bcp.config.Name, node.Name); err != nil {
return err
}
diff --git a/pkg/clusterdeployment/binary/cleanupcluster/cleanupnode.go b/pkg/clusterdeployment/binary/cleanupcluster/cleanupnode.go
index 76cd7ce..da0488a 100644
--- a/pkg/clusterdeployment/binary/cleanupcluster/cleanupnode.go
+++ b/pkg/clusterdeployment/binary/cleanupcluster/cleanupnode.go
@@ -232,7 +232,6 @@ func execRemoveWorkerTask(conf *api.ClusterConfig, hostconfig *api.HostConfig) e
task.SetIgnoreErrorFlag(taskRemoveWorker)
if err := nodemanager.RunTaskOnNodes(taskRemoveWorker, []string{master}); err != nil {
- logrus.Errorf("run task for remove worker failed: %v", err)
return err
}
@@ -246,8 +245,7 @@ func CleanupNode(conf *api.ClusterConfig, hostconfig *api.HostConfig, delType ui
if utils.IsType(delType, api.Worker) {
if err := execRemoveWorkerTask(conf, hostconfig); err != nil {
- logrus.Errorf("remove workers failed: %v", err)
- return err
+ logrus.Warnf("ignore: remove workers failed: %v", err)
}
}
diff --git a/pkg/clusterdeployment/clusterdeploy.go b/pkg/clusterdeployment/clusterdeploy.go
index 016da2b..c4e0880 100644
--- a/pkg/clusterdeployment/clusterdeploy.go
+++ b/pkg/clusterdeployment/clusterdeploy.go
@@ -28,121 +28,215 @@ import (
"isula.org/eggo/pkg/utils/nodemanager"
)
-func doCreateCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig) error {
- var loadbalancer *api.HostConfig
- var controlPlane *api.HostConfig
- var joinNodes []*api.HostConfig
- var joinNodeIDs []string
+func splitNodes(nodes []*api.HostConfig) (*api.HostConfig, []*api.HostConfig, []*api.HostConfig, []string) {
+ var lb *api.HostConfig
+ var masters []*api.HostConfig
+ var workers []*api.HostConfig
var etcdNodes []string
- // Step1: setup infrastructure for all nodes in the cluster
- for _, n := range cc.Nodes {
- if err := handler.MachineInfraSetup(n); err != nil {
- return err
- }
+
+ for _, n := range nodes {
if utils.IsType(n.Type, api.LoadBalance) {
- loadbalancer = n
+ lb = n
}
if utils.IsType(n.Type, api.ETCD) {
etcdNodes = append(etcdNodes, n.Address)
}
+
+ if utils.IsType(n.Type, api.Master) {
+ masters = append(masters, n)
+ // node with master and worker, just put into masters
+ continue
+ }
+
if utils.IsType(n.Type, api.Worker) {
- joinNodes = append(joinNodes, n)
- joinNodeIDs = append(joinNodeIDs, n.Address)
+ workers = append(workers, n)
}
+ }
- if utils.IsType(n.Type, api.Master) {
- if controlPlane == nil {
- controlPlane = n
- } else {
- joinNodes = append(joinNodes, n)
- joinNodeIDs = append(joinNodeIDs, n.Address)
- }
+ return lb, masters, workers, etcdNodes
+}
+
+func doJoinNodeOfCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, masters, workers []*api.HostConfig) ([]string, []*api.HostConfig, error) {
+ var joinedNodeIDs []string
+ var failedNodes []*api.HostConfig
+ for _, node := range workers {
+ if err := handler.ClusterNodeJoin(node); err != nil {
+ failedNodes = append(failedNodes, node)
+ continue
+ }
+ joinedNodeIDs = append(joinedNodeIDs, node.Address)
+ }
+ for _, node := range masters {
+ if err := handler.ClusterNodeJoin(node); err != nil {
+ failedNodes = append(failedNodes, node)
+ continue
+ }
+ joinedNodeIDs = append(joinedNodeIDs, node.Address)
+ }
+ // wait all nodes ready
+ if err := nodemanager.WaitNodesFinishWithProgress(joinedNodeIDs, time.Minute*5); err != nil {
+ tFailedNodes, successNodes := nodemanager.CheckNodesStatus(joinedNodeIDs)
+ // update joined and failed nodes
+ failedNodes = append(failedNodes, tFailedNodes...)
+ joinedNodeIDs = successNodes
+ if len(successNodes) == 0 {
+ return joinedNodeIDs, nil, err
+ }
+ logrus.Warnf("wait some node to complete join failed: %v", err)
+ }
+
+ return joinedNodeIDs, failedNodes, nil
+}
+
+func doCreateCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, cstatus *api.ClusterStatus) ([]*api.HostConfig, error) {
+ loadbalancer, masters, workers, etcdNodes := splitNodes(cc.Nodes)
+
+ if len(masters) == 0 {
+ return nil, fmt.Errorf("no master found")
+ }
+ controlPlaneNode, err := masters[0].DeepCopy()
+ if err != nil {
+ return nil, err
+ }
+ cstatus.ControlPlane = controlPlaneNode.Address
+ masters = masters[1:]
+
+ // Step1: setup infrastructure for all nodes in the cluster
+ for _, n := range cc.Nodes {
+ if err = handler.MachineInfraSetup(n); err != nil {
+ return nil, err
}
}
// Step2: run precreate cluster hooks
- if err := handler.PreCreateClusterHooks(); err != nil {
- return err
+ if err = handler.PreCreateClusterHooks(); err != nil {
+ return nil, err
}
// Step3: setup etcd cluster
// wait infrastructure task success on nodes of etcd cluster
- if err := nodemanager.WaitNodesFinishWithProgress(etcdNodes, time.Minute*5); err != nil {
- return err
+ if err = nodemanager.WaitNodesFinishWithProgress(etcdNodes, time.Minute*5); err != nil {
+ return nil, err
}
- if err := handler.EtcdClusterSetup(); err != nil {
- return err
+ if err = handler.EtcdClusterSetup(); err != nil {
+ return nil, err
}
+
// Step4: setup loadbalance for cluster
- if err := handler.LoadBalancerSetup(loadbalancer); err != nil {
- return err
+ if err = handler.LoadBalancerSetup(loadbalancer); err != nil {
+ return nil, err
}
+
// Step5: setup control plane for cluster
- if err := handler.ClusterControlPlaneInit(controlPlane); err != nil {
- return err
+ if err = handler.ClusterControlPlaneInit(controlPlaneNode); err != nil {
+ return nil, err
}
// wait controlplane setup task success
- if err := nodemanager.WaitNodesFinish([]string{controlPlane.Address}, time.Minute*5); err != nil {
- return err
+ if err = nodemanager.WaitNodesFinish([]string{controlPlaneNode.Address}, time.Minute*5); err != nil {
+ return nil, err
}
-
- //Step6: setup left nodes for cluster
- for _, node := range joinNodes {
- if err := handler.ClusterNodeJoin(node); err != nil {
- return err
+ if utils.IsType(controlPlaneNode.Type, api.Worker) {
+ controlPlaneNode.Type = utils.ClearType(controlPlaneNode.Type, api.Master)
+ if err = handler.ClusterNodeJoin(controlPlaneNode); err != nil {
+ return nil, err
}
}
- // wait all nodes ready
- if err := nodemanager.WaitNodesFinishWithProgress(joinNodeIDs, time.Minute*5); err != nil {
- return err
+
+ //Step6: setup left nodes for cluster
+ joinedNodeIDs, failedNodes, err := doJoinNodeOfCluster(handler, cc, masters, workers)
+ if err != nil {
+ return nil, err
}
//Step7: setup addons for cluster
- if err := handler.AddonsSetup(); err != nil {
- return err
+ if err = handler.AddonsSetup(); err != nil {
+ return nil, err
}
// Step8: run postcreate cluster hooks
- if err := handler.PostCreateClusterHooks(); err != nil {
- return err
+ if err = handler.PostCreateClusterHooks(); err != nil {
+ return nil, err
}
- allNodes := utils.GetAllIPs(cc.Nodes)
- if err := nodemanager.WaitNodesFinishWithProgress(allNodes, time.Minute*5); err != nil {
- return err
+ if err = nodemanager.WaitNodesFinishWithProgress(append(joinedNodeIDs, controlPlaneNode.Address), time.Minute*5); err != nil {
+ return nil, err
}
- return nil
+ for _, sid := range joinedNodeIDs {
+ cstatus.StatusOfNodes[sid] = true
+ cstatus.SuccessCnt += 1
+ }
+ cstatus.Working = true
+
+ return failedNodes, nil
}
-func CreateCluster(cc *api.ClusterConfig) error {
+func rollbackFailedNoeds(handler api.ClusterDeploymentAPI, nodes []*api.HostConfig) {
+ if nodes == nil {
+ return
+ }
+ var rollIDs []string
+ for _, n := range nodes {
+ // do best to cleanup, if error, just ignore
+ handler.ClusterNodeCleanup(n, n.Type)
+ handler.MachineInfraDestroy(n)
+ rollIDs = append(rollIDs, n.Address)
+ }
+
+ if err := nodemanager.WaitNodesFinishWithProgress(rollIDs, time.Minute*5); err != nil {
+ logrus.Warnf("rollback failed: %v", err)
+ }
+}
+
+func CreateCluster(cc *api.ClusterConfig) (api.ClusterStatus, error) {
+ cstatus := api.ClusterStatus{
+ StatusOfNodes: make(map[string]bool),
+ }
if cc == nil {
- return fmt.Errorf("[cluster] cluster config is required")
+ return cstatus, fmt.Errorf("[cluster] cluster config is required")
}
creator, err := manager.GetClusterDeploymentDriver(cc.DeployDriver)
if err != nil {
logrus.Errorf("[cluster] get cluster deployment driver: %s failed: %v", cc.DeployDriver, err)
- return err
+ return cstatus, err
}
handler, err := creator(cc)
if err != nil {
logrus.Errorf("[cluster] create cluster deployment instance with driver: %s, failed: %v", cc.DeployDriver, err)
- return err
+ return cstatus, err
}
defer handler.Finish()
// prepare eggo config directory
if err := os.MkdirAll(api.GetClusterHomePath(cc.Name), 0750); err != nil {
- return err
+ return cstatus, err
}
- if err := doCreateCluster(handler, cc); err != nil {
- return err
+ failedNodes, err := doCreateCluster(handler, cc, &cstatus)
+ if err != nil {
+ logrus.Warnf("rollback cluster: %s", cc.Name)
+ doRemoveCluster(handler, cc)
+ cstatus.Message = err.Error()
+ return cstatus, err
+ }
+ // rollback failed nodes
+ rollbackFailedNoeds(handler, failedNodes)
+ // update status of cluster
+ if failedNodes != nil {
+ var failureIDs []string
+ for _, fid := range failedNodes {
+ failureIDs = append(failureIDs, fid.Address)
+ cstatus.StatusOfNodes[fid.Address] = false
+ cstatus.FailureCnt += 1
+ }
+ logrus.Warnf("[cluster] failed nodes: %v", failureIDs)
+ cstatus.Message = "partial success of create cluster"
+ return cstatus, nil
}
- logrus.Infof("[cluster] create cluster '%s' successed", cc.Name)
- return nil
+ cstatus.Message = "create cluster success"
+ return cstatus, nil
}
func doJoinNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, hostconfig *api.HostConfig) error {
diff --git a/pkg/utils/nodemanager/node.go b/pkg/utils/nodemanager/node.go
index d610e3b..a19d2e7 100644
--- a/pkg/utils/nodemanager/node.go
+++ b/pkg/utils/nodemanager/node.go
@@ -117,7 +117,7 @@ func (n *Node) updateNodeStatus(message string, status int) {
func (n *Node) PushTask(t task.Task) bool {
// only run ignore error tasks to cleanup node
- if n.status.HasError() && task.IsIgnoreError(t) {
+ if n.status.HasError() && !task.IsIgnoreError(t) {
logrus.Debugf("node finished with error: %v", n.status.Message)
return false
}
diff --git a/pkg/utils/nodemanager/nodemanager.go b/pkg/utils/nodemanager/nodemanager.go
index b135890..25f7d4e 100644
--- a/pkg/utils/nodemanager/nodemanager.go
+++ b/pkg/utils/nodemanager/nodemanager.go
@@ -37,6 +37,28 @@ var manager = &NodeManager{
nodes: make(map[string]*Node, 2),
}
+// return: key is node IP; value true is failed, false is success
+func CheckNodesStatus(checkNodes []string) ([]*api.HostConfig, []string) {
+ var failures []*api.HostConfig
+ var success []string
+ manager.lock.RLock()
+ defer manager.lock.RUnlock()
+ for _, cn := range checkNodes {
+ n, ok := manager.nodes[cn]
+ if !ok {
+ failures = append(failures, n.host)
+ continue
+ }
+ if n.GetStatus().HasError() {
+ failures = append(failures, n.host)
+ } else {
+ success = append(success, cn)
+ }
+ }
+
+ return failures, success
+}
+
func RegisterNode(hcf *api.HostConfig, r runner.Runner) error {
manager.lock.Lock()
defer manager.lock.Unlock()
@@ -227,7 +249,10 @@ outfor:
}
logrus.Infof("Tasks progress: %s", sb.String())
unfinishedNodes = nextUnfinished
- time.Sleep(time.Second)
+
+ // sleep time depend on count of wait nodes
+ st := len(unfinishedNodes) + 1
+ time.Sleep(time.Second * time.Duration(st))
}
}
diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go
index 72f8777..84ec0ea 100644
--- a/pkg/utils/utils.go
+++ b/pkg/utils/utils.go
@@ -54,6 +54,10 @@ func IsType(curType uint16, expectedType uint16) bool {
return (curType & expectedType) == expectedType
}
+func ClearType(curType uint16, clearType uint16) uint16 {
+ return (curType & ^clearType)
+}
+
func AddSudo(cmd string) string {
return "sudo -E /bin/sh -c \"" + cmd + "\""
}
--
2.25.1

View File

@ -0,0 +1,418 @@
From 52f415c3888b9bd0484ba5f66b2e987e41d4f9fb Mon Sep 17 00:00:00 2001
From: haozi007 <liuhao27@huawei.com>
Date: Mon, 2 Aug 2021 07:24:24 +0100
Subject: [PATCH 8/9] refactor node mananger
1. add timeout of run task;
2. show tasks history on node;
Signed-off-by: haozi007 <liuhao27@huawei.com>
---
.../binary/addons/runner_addons.go | 3 +-
pkg/clusterdeployment/binary/binary.go | 7 +-
.../binary/cleanupcluster/cleanupetcd.go | 6 +-
.../cleanupcluster/cleanuploadbalance.go | 3 +-
.../binary/cleanupcluster/cleanupnode.go | 6 +-
.../binary/coredns/binarycoredns.go | 3 +-
.../binary/coredns/podcoredns.go | 3 +-
.../binary/etcdcluster/etcdreconfig.go | 3 +-
.../binary/infrastructure/infrastructure.go | 3 +-
.../binary/network/network.go | 3 +-
pkg/utils/nodemanager/node.go | 90 ++++++++++++++-----
pkg/utils/task/task.go | 10 +++
12 files changed, 93 insertions(+), 47 deletions(-)
diff --git a/pkg/clusterdeployment/binary/addons/runner_addons.go b/pkg/clusterdeployment/binary/addons/runner_addons.go
index f8c0dad..e5bc0ad 100644
--- a/pkg/clusterdeployment/binary/addons/runner_addons.go
+++ b/pkg/clusterdeployment/binary/addons/runner_addons.go
@@ -122,7 +122,7 @@ func cleanupAddons(cluster *api.ClusterConfig) error {
yamlPath := filepath.Join(cluster.PackageSrc.GetPkgDstPath(), constants.DefaultFilePath)
kubeconfig := filepath.Join(cluster.GetConfigDir(), constants.KubeConfigFileNameAdmin)
- t := task.NewTaskInstance(&CleanupAddonsTask{
+ t := task.NewTaskIgnoreErrInstance(&CleanupAddonsTask{
yaml: yaml,
srcPath: yamlPath,
kubeconfig: kubeconfig,
@@ -134,7 +134,6 @@ func cleanupAddons(cluster *api.ClusterConfig) error {
}
}
- task.SetIgnoreErrorFlag(t)
useMaster, err := nodemanager.RunTaskOnOneNode(t, masters)
if err != nil {
return err
diff --git a/pkg/clusterdeployment/binary/binary.go b/pkg/clusterdeployment/binary/binary.go
index dd260b0..f901643 100644
--- a/pkg/clusterdeployment/binary/binary.go
+++ b/pkg/clusterdeployment/binary/binary.go
@@ -102,12 +102,11 @@ func (bcp *BinaryClusterDeployment) registerNodes() error {
defer func() {
if err != nil {
bcp.Finish()
- nodemanager.UnRegisterAllNodes()
}
}()
for _, cfg := range bcp.config.Nodes {
- err := bcp.registerNode(cfg)
+ err = bcp.registerNode(cfg)
if err != nil {
return err
}
@@ -408,9 +407,7 @@ func (bcp *BinaryClusterDeployment) Finish() {
logrus.Info("do finish binary deployment...")
bcp.connLock.Lock()
defer bcp.connLock.Unlock()
- for _, c := range bcp.connections {
- c.Close()
- }
+ nodemanager.UnRegisterAllNodes()
bcp.connections = make(map[string]runner.Runner)
logrus.Info("do finish binary deployment success")
}
diff --git a/pkg/clusterdeployment/binary/cleanupcluster/cleanupetcd.go b/pkg/clusterdeployment/binary/cleanupcluster/cleanupetcd.go
index 81e1fe6..ed79246 100644
--- a/pkg/clusterdeployment/binary/cleanupcluster/cleanupetcd.go
+++ b/pkg/clusterdeployment/binary/cleanupcluster/cleanupetcd.go
@@ -84,13 +84,12 @@ func CleanupEtcdMember(conf *api.ClusterConfig, hostconfig *api.HostConfig) erro
}
// cleanup remains
- taskCleanupEtcdMember := task.NewTaskInstance(
+ taskCleanupEtcdMember := task.NewTaskIgnoreErrInstance(
&cleanupEtcdMemberTask{
ccfg: conf,
},
)
- task.SetIgnoreErrorFlag(taskCleanupEtcdMember)
if err := nodemanager.RunTaskOnNodes(taskCleanupEtcdMember, []string{hostconfig.Address}); err != nil {
return fmt.Errorf("run task for cleanup etcd member failed: %v", err)
}
@@ -114,14 +113,13 @@ func CleanupAllEtcds(conf *api.ClusterConfig) error {
}
// cleanup remains
- taskCleanupAllEtcds := task.NewTaskInstance(
+ taskCleanupAllEtcds := task.NewTaskIgnoreErrInstance(
&cleanupEtcdMemberTask{
ccfg: conf,
},
)
nodes := utils.GetAllIPs(conf.EtcdCluster.Nodes)
- task.SetIgnoreErrorFlag(taskCleanupAllEtcds)
if err := nodemanager.RunTaskOnNodes(taskCleanupAllEtcds, nodes); err != nil {
return fmt.Errorf("run task for cleanup all etcds failed: %v", err)
}
diff --git a/pkg/clusterdeployment/binary/cleanupcluster/cleanuploadbalance.go b/pkg/clusterdeployment/binary/cleanupcluster/cleanuploadbalance.go
index 010ef84..d836f99 100644
--- a/pkg/clusterdeployment/binary/cleanupcluster/cleanuploadbalance.go
+++ b/pkg/clusterdeployment/binary/cleanupcluster/cleanuploadbalance.go
@@ -56,13 +56,12 @@ func (t *cleanupLoadBalanceTask) Run(r runner.Runner, hostConfig *api.HostConfig
}
func CleanupLoadBalance(conf *api.ClusterConfig, lb *api.HostConfig) error {
- taskCleanupLoadBalance := task.NewTaskInstance(
+ taskCleanupLoadBalance := task.NewTaskIgnoreErrInstance(
&cleanupLoadBalanceTask{
ccfg: conf,
},
)
- task.SetIgnoreErrorFlag(taskCleanupLoadBalance)
if err := nodemanager.RunTaskOnNodes(taskCleanupLoadBalance, []string{lb.Address}); err != nil {
return fmt.Errorf("run task for cleanup loadbalance failed: %v", err)
}
diff --git a/pkg/clusterdeployment/binary/cleanupcluster/cleanupnode.go b/pkg/clusterdeployment/binary/cleanupcluster/cleanupnode.go
index da0488a..9d2d009 100644
--- a/pkg/clusterdeployment/binary/cleanupcluster/cleanupnode.go
+++ b/pkg/clusterdeployment/binary/cleanupcluster/cleanupnode.go
@@ -218,7 +218,7 @@ func (t *removeWorkerTask) Run(r runner.Runner, hostConfig *api.HostConfig) erro
}
func execRemoveWorkerTask(conf *api.ClusterConfig, hostconfig *api.HostConfig) error {
- taskRemoveWorker := task.NewTaskInstance(
+ taskRemoveWorker := task.NewTaskIgnoreErrInstance(
&removeWorkerTask{
ccfg: conf,
workerName: hostconfig.Name,
@@ -230,7 +230,6 @@ func execRemoveWorkerTask(conf *api.ClusterConfig, hostconfig *api.HostConfig) e
return fmt.Errorf("failed to get first master")
}
- task.SetIgnoreErrorFlag(taskRemoveWorker)
if err := nodemanager.RunTaskOnNodes(taskRemoveWorker, []string{master}); err != nil {
return err
}
@@ -249,14 +248,13 @@ func CleanupNode(conf *api.ClusterConfig, hostconfig *api.HostConfig, delType ui
}
}
- taskCleanupNode := task.NewTaskInstance(
+ taskCleanupNode := task.NewTaskIgnoreErrInstance(
&cleanupNodeTask{
ccfg: conf,
delType: delType,
},
)
- task.SetIgnoreErrorFlag(taskCleanupNode)
if err := nodemanager.RunTaskOnNodes(taskCleanupNode, []string{hostconfig.Address}); err != nil {
return fmt.Errorf("run task for cleanup cluster failed: %v", err)
}
diff --git a/pkg/clusterdeployment/binary/coredns/binarycoredns.go b/pkg/clusterdeployment/binary/coredns/binarycoredns.go
index db9bcd5..411dba2 100644
--- a/pkg/clusterdeployment/binary/coredns/binarycoredns.go
+++ b/pkg/clusterdeployment/binary/coredns/binarycoredns.go
@@ -362,14 +362,13 @@ func (bc *BinaryCoredns) Cleanup(cluster *api.ClusterConfig) error {
logrus.Warn("no master host found, can not cleanup coredns service")
return nil
}
- sst := task.NewTaskInstance(
+ sst := task.NewTaskIgnoreErrInstance(
&BinaryCorednsCleanupTask{
Cluster: cluster,
cleanYaml: true,
},
)
- task.SetIgnoreErrorFlag(sst)
err := nodemanager.RunTaskOnNodes(sst, masterIPs)
if err != nil {
logrus.Warnf("run cleanup coredns task failed: %v", err)
diff --git a/pkg/clusterdeployment/binary/coredns/podcoredns.go b/pkg/clusterdeployment/binary/coredns/podcoredns.go
index c27f1d1..28ae908 100644
--- a/pkg/clusterdeployment/binary/coredns/podcoredns.go
+++ b/pkg/clusterdeployment/binary/coredns/podcoredns.go
@@ -150,7 +150,7 @@ func (pc *PodCoredns) Cleanup(cluster *api.ClusterConfig) error {
if cluster == nil {
return fmt.Errorf("invalid cluster config")
}
- t := task.NewTaskInstance(&PodCorednsCleanupTask{Cluster: cluster})
+ t := task.NewTaskIgnoreErrInstance(&PodCorednsCleanupTask{Cluster: cluster})
var masters []string
for _, n := range cluster.Nodes {
if (n.Type & api.Master) != 0 {
@@ -158,7 +158,6 @@ func (pc *PodCoredns) Cleanup(cluster *api.ClusterConfig) error {
}
}
- task.SetIgnoreErrorFlag(t)
useMaster, err := nodemanager.RunTaskOnOneNode(t, masters)
if err != nil {
return err
diff --git a/pkg/clusterdeployment/binary/etcdcluster/etcdreconfig.go b/pkg/clusterdeployment/binary/etcdcluster/etcdreconfig.go
index b417861..aadc4a8 100644
--- a/pkg/clusterdeployment/binary/etcdcluster/etcdreconfig.go
+++ b/pkg/clusterdeployment/binary/etcdcluster/etcdreconfig.go
@@ -196,13 +196,12 @@ func (t *removeEtcdsTask) Run(r runner.Runner, hostConfig *api.HostConfig) error
}
func execRemoveEtcdsTask(conf *api.ClusterConfig, node string) error {
- taskRemoveEtcds := task.NewTaskInstance(
+ taskRemoveEtcds := task.NewTaskIgnoreErrInstance(
&removeEtcdsTask{
ccfg: conf,
},
)
- task.SetIgnoreErrorFlag(taskRemoveEtcds)
if err := nodemanager.RunTaskOnNodes(taskRemoveEtcds, []string{node}); err != nil {
logrus.Errorf("run task for remove etcds failed: %v", err)
return err
diff --git a/pkg/clusterdeployment/binary/infrastructure/infrastructure.go b/pkg/clusterdeployment/binary/infrastructure/infrastructure.go
index bf6ed3a..407c4a8 100644
--- a/pkg/clusterdeployment/binary/infrastructure/infrastructure.go
+++ b/pkg/clusterdeployment/binary/infrastructure/infrastructure.go
@@ -419,13 +419,12 @@ func NodeInfrastructureDestroy(config *api.ClusterConfig, hostconfig *api.HostCo
return fmt.Errorf("do not register %d roleinfra", hostconfig.Type)
}
- itask := task.NewTaskInstance(
+ itask := task.NewTaskIgnoreErrInstance(
&DestroyInfraTask{
packageSrc: &config.PackageSrc,
roleInfra: roleInfra,
})
- task.SetIgnoreErrorFlag(itask)
if err := nodemanager.RunTaskOnNodes(itask, []string{hostconfig.Address}); err != nil {
return fmt.Errorf("destroy infrastructure Task failed: %v", err)
}
diff --git a/pkg/clusterdeployment/binary/network/network.go b/pkg/clusterdeployment/binary/network/network.go
index e1cd73f..0c35a9c 100644
--- a/pkg/clusterdeployment/binary/network/network.go
+++ b/pkg/clusterdeployment/binary/network/network.go
@@ -121,7 +121,7 @@ func CleanupNetwork(cluster *api.ClusterConfig) error {
if cluster == nil {
return fmt.Errorf("invalid cluster config")
}
- t := task.NewTaskInstance(&CleanupNetworkTask{Cluster: cluster})
+ t := task.NewTaskIgnoreErrInstance(&CleanupNetworkTask{Cluster: cluster})
var masters []string
for _, n := range cluster.Nodes {
if (n.Type & api.Master) != 0 {
@@ -129,7 +129,6 @@ func CleanupNetwork(cluster *api.ClusterConfig) error {
}
}
- task.SetIgnoreErrorFlag(t)
useMaster, err := nodemanager.RunTaskOnOneNode(t, masters)
if err != nil {
return err
diff --git a/pkg/utils/nodemanager/node.go b/pkg/utils/nodemanager/node.go
index a19d2e7..08af098 100644
--- a/pkg/utils/nodemanager/node.go
+++ b/pkg/utils/nodemanager/node.go
@@ -17,6 +17,7 @@ package nodemanager
import (
"fmt"
+ "strings"
"sync"
"time"
@@ -54,6 +55,12 @@ func (ns NodeStatus) ShowCounts() string {
return fmt.Sprintf("{ total: %d, success: %d, fail: %d, ignore: %d }", ns.TaskTotalCnt, ns.TaskSuccessCnt, ns.TaskFailCnt, ns.TaskIgnoreCnt)
}
+type taskSummary struct {
+ name string
+ useTime time.Duration
+ status string
+}
+
type Node struct {
host *api.HostConfig
r runner.Runner
@@ -62,6 +69,32 @@ type Node struct {
queue chan task.Task
lock sync.RWMutex
status NodeStatus
+
+ tasksHistory []taskSummary
+}
+
+func (n *Node) addHistory(t task.Task, err error, useTime time.Duration) {
+ ts := taskSummary{name: t.Name(), useTime: useTime}
+ if err == nil {
+ ts.status = "success"
+ } else {
+ if task.IsIgnoreError(t) {
+ ts.status = fmt.Sprintf("ignore err: %v", err)
+ } else {
+ ts.status = err.Error()
+ }
+ }
+ n.tasksHistory = append(n.tasksHistory, ts)
+}
+
+func (n *Node) ShowTaskList() string {
+ var sb strings.Builder
+ sb.WriteString(fmt.Sprintf("\n##################tasks on node: %s#################\n", n.host.Name))
+ for _, n := range n.tasksHistory {
+ sb.WriteString(fmt.Sprintf("name: %s, elapsed time: %s, message: %s\n", n.name, n.useTime.String(), n.status))
+ }
+ sb.WriteString("#########################################\n")
+ return sb.String()
}
func (n *Node) GetStatus() NodeStatus {
@@ -135,7 +168,42 @@ func (n *Node) PushTask(t task.Task) bool {
func (n *Node) Finish() {
n.stop <- true
n.r.Close()
- logrus.Infof("node: %s is finished", n.host.Address)
+ logrus.Infof(n.ShowTaskList())
+}
+
+func doRunTask(n *Node, t task.Task) {
+ start := time.Now()
+ echan := make(chan error)
+ go func(ec chan error) {
+ select {
+ // TODO: maybe we need get timeout from task
+ case <-time.After(time.Second * 300):
+ ec <- fmt.Errorf("timeout to run task")
+ case ec <- t.Run(n.r, n.host):
+ }
+ }(echan)
+
+ err := <-echan
+ finish := time.Now()
+
+ if err != nil {
+ label := fmt.Sprintf("%s: run task: %s on node: %s fail: %v", task.FAILED, t.Name(), n.host.Address, err)
+ t.AddLabel(n.host.Address, label)
+ if task.IsIgnoreError(t) {
+ logrus.Warnf("ignore: %s", label)
+ n.updateNodeStatus("", IgnoreStatus)
+ } else {
+ logrus.Errorf("%s", label)
+ // set task status on node after task
+ n.updateNodeStatus(label, ErrorStatus)
+ }
+ } else {
+ t.AddLabel(n.host.Address, task.SUCCESS)
+ // set task status on node after task
+ n.updateNodeStatus("", FinishStatus)
+ logrus.Infof("run task: %s success on %s\n", t.Name(), n.host.Address)
+ }
+ n.addHistory(t, err, finish.UTC().Sub(start))
}
func NewNode(hcf *api.HostConfig, r runner.Runner) (*Node, error) {
@@ -153,25 +221,7 @@ func NewNode(hcf *api.HostConfig, r runner.Runner) (*Node, error) {
case <-n.stop:
return
case t := <-n.queue:
- // set task status on node before run task
- err := t.Run(n.r, n.host)
- if err != nil {
- label := fmt.Sprintf("%s: run task: %s on node: %s fail: %v", task.FAILED, t.Name(), n.host.Address, err)
- t.AddLabel(n.host.Address, label)
- if task.IsIgnoreError(t) {
- logrus.Warnf("ignore: %s", label)
- n.updateNodeStatus("", IgnoreStatus)
- } else {
- logrus.Errorf("%s", label)
- // set task status on node after task
- n.updateNodeStatus(label, ErrorStatus)
- }
- } else {
- t.AddLabel(n.host.Address, task.SUCCESS)
- // set task status on node after task
- n.updateNodeStatus("", FinishStatus)
- logrus.Infof("run task: %s success on %s\n", t.Name(), n.host.Address)
- }
+ doRunTask(n, t)
}
}
}(n)
diff --git a/pkg/utils/task/task.go b/pkg/utils/task/task.go
index f896d83..8452b01 100644
--- a/pkg/utils/task/task.go
+++ b/pkg/utils/task/task.go
@@ -53,6 +53,16 @@ func NewTaskInstance(t TaskRun) *TaskInstance {
}
}
+func NewTaskIgnoreErrInstance(t TaskRun) *TaskInstance {
+ ti := &TaskInstance{
+ data: make(map[string]string),
+ TaskRun: t,
+ }
+
+ ti.AddLabel(IgnoreErr, "true")
+ return ti
+}
+
func (t *TaskInstance) AddLabel(key, label string) {
t.l.Lock()
defer t.l.Unlock()
--
2.25.1

69
0009-modify-manual.patch Normal file
View File

@ -0,0 +1,69 @@
From 79aa934ed0ca0f7e2b6b40e88388872ad9e07680 Mon Sep 17 00:00:00 2001
From: zhangxiaoyu <zhangxiaoyu58@huawei.com>
Date: Wed, 28 Jul 2021 17:06:11 +0800
Subject: [PATCH 9/9] modify manual
Signed-off-by: zhangxiaoyu <zhangxiaoyu58@huawei.com>
---
config/centos.config | 2 +-
docs/manual.md | 9 +++++----
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/config/centos.config b/config/centos.config
index 71a91bc..2cdf465 100755
--- a/config/centos.config
+++ b/config/centos.config
@@ -19,7 +19,7 @@ install:
package-source:
type: tar.gz
dstpath: ""
- armsrc: /root/packages/pacakges-arm.tar.gz
+ armsrc: /root/packages/packages-arm.tar.gz
x86src: /root/packages/packages-x86.tar.gz
etcd:
- name: etcd,etcdctl
diff --git a/docs/manual.md b/docs/manual.md
index 0e86158..503b034 100644
--- a/docs/manual.md
+++ b/docs/manual.md
@@ -106,6 +106,7 @@ $ eggo -d deploy -f deploy.yaml
```
$ eggo -d join --id k8s-cluster --type master,worker --arch arm64 --port 22 192.168.0.5
+```
* -d参数表示打印调试信息
* --id集群的id
@@ -155,12 +156,12 @@ cluster-id: k8s-cluster // 集群名称
username: root // 需要部署k8s集群的机器的ssh登录用户名所有机器都需要使用同一个用户名
password: 123456 // 需要部署k8s集群的机器的ssh登录密码所有机器都需要使用同一个密码
masters: // 配置master节点的列表建议每个master节点同时作为node节点否则master节点可以无法直接访问pod
-- name: test0 // 该节点的名称会设置该名称为该节点的hostname并设置为k8s集群看到的该节点的名称
+- name: test0 // 该节点的名称为k8s集群看到的该节点的名称
ip: 192.168.0.1 // 该节点的ip地址
port: 22 // ssh登录的端口
arch: arm64 // 机器架构x86_64的填amd64
workers: // 配置worker节点的列表
-- name: test0 // 该节点的名称会设置该名称为该节点的hostname并设置为k8s集群看到的该节点的名称
+- name: test0 // 该节点的名称为k8s集群看到的该节点的名称
ip: 192.168.0.2 // 该节点的ip地址
port: 22 // ssh登录的端口
arch: arm64 // 机器架构x86_64的填amd64
@@ -169,12 +170,12 @@ workers: // 配置worker节点的列表
port: 22
arch: arm64
etcds: // 配置etcd节点的列表如果该项为空则将会为每个master节点部署一个etcd否则只会部署配置的etcd节点
-- name: etcd-0 // 该节点的名称会设置该名称为该节点的hostname并设置为k8s集群看到的该节点的名称
+- name: etcd-0 // 该节点的名称为k8s集群看到的该节点的名称
ip: 192.168.0.4 // 该节点的ip地址
port: 22 // ssh登录的端口
arch: amd64 // 机器架构x86_64的填amd64
loadbalance: // 配置loadbalance节点
- name: k8s-loadbalance // 该节点的名称会设置该名称为该节点的hostname并设置为k8s集群看到的该节点的名称
+ name: k8s-loadbalance // 该节点的名称为k8s集群看到的该节点的名称
ip: 192.168.0.5 // 该节点的ip地址
port: 22 // ssh登录的端口
arch: amd64 // 机器架构x86_64的填amd64
--
2.25.1

View File

@ -1,14 +1,21 @@
%global _version 0.9.1
%global _release 20210720.203516.git8e9b816d
Name: eggo
Version: %{_version}
Release: %{_release}
Version: 0.9.1
Release: 1
Summary: Eggo is a tool built to provide standard multi-ways for creating Kubernetes clusters.
License: Mulan PSL V2
URL: https://gitee.com/openeuler/eggo
Source0: https://gitee.com/openeuler/eggo/repository/archive/v%{version}.tar.gz
Patch0001: 0001-update-docs-of-eggo.patch
Patch0002: 0002-fix-some-bugs.patch
Patch0003: 0003-use-export-to-replace-go-env.patch
Patch0004: 0004-add-hooks-apis-for-cluster.patch
Patch0005: 0005-add-ip-name-for-hosts.patch
Patch0006: 0006-remove-unuseful-code.patch
Patch0007: 0007-support-rollback-of-create-cluster.patch
Patch0008: 0008-refactor-node-mananger.patch
Patch0009: 0009-modify-manual.patch
BuildRequires: make
BuildRequires: git
BuildRequires: golang >= 1.13
@ -46,6 +53,12 @@ rm -rf src
%attr(551,root,root) %{_bindir}/eggo
%changelog
* Mon Aug 02 2021 zhangxiaoyu<zhangxiaoyu58@huawei.com> - 0.9.1-1
- Type:upgrade
- CVE:NA
- SUG:NA
- DESC:upgrage to v0.9.1-1
* Tue Jul 20 2021 zhangxiaoyu<zhangxiaoyu58@huawei.com> - 0.9.1-20210712.150722.gitddf3d38e
- Type:upgrade
- CVE:NA