From ab6783942ef1a76fd77b77ac2de596c702ba4607 Mon Sep 17 00:00:00 2001 From: haozi007 Date: Tue, 27 Jul 2021 08:30:51 +0100 Subject: [PATCH 4/9] add hooks apis for cluster Signed-off-by: haozi007 --- config/openEuler.config | 4 +- pkg/api/types.go | 10 ++ pkg/clusterdeployment/binary/binary.go | 149 ++++++++++++++++++ .../binary/commontools/runshell.go | 39 +++++ pkg/clusterdeployment/clusterdeploy.go | 48 +++--- pkg/utils/nodemanager/node.go | 8 +- pkg/utils/nodemanager/nodemanager.go | 19 +-- pkg/utils/utils.go | 2 +- pkg/utils/utils_test.go | 121 ++++++++++++++ 9 files changed, 353 insertions(+), 47 deletions(-) create mode 100644 pkg/clusterdeployment/binary/commontools/runshell.go create mode 100644 pkg/utils/utils_test.go diff --git a/config/openEuler.config b/config/openEuler.config index eac76ff..6ced22f 100755 --- a/config/openEuler.config +++ b/config/openEuler.config @@ -21,8 +21,8 @@ install: package-source: type: tar.gz dstpath: "" - armsrc: /root/pacakges/pacakges-arm.tar.gz - x86src: /root/pacakges/packages-x86.tar.gz + armsrc: /root/packages/packages-arm.tar.gz + x86src: /root/packages/packages-x86.tar.gz container: - name: iSulad type: repo diff --git a/pkg/api/types.go b/pkg/api/types.go index 8097779..403dde3 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -232,6 +232,16 @@ type EtcdAPI interface { type ClusterManagerAPI interface { // TODO: should add other dependence cluster configurations + PreCreateClusterHooks() error + PostCreateClusterHooks() error + PreDeleteClusterHooks() + PostDeleteClusterHooks() + + PreNodeJoinHooks(node *HostConfig) error + PostNodeJoinHooks(node *HostConfig) error + PreNodeCleanupHooks(node *HostConfig) + PostNodeCleanupHooks(node *HostConfig) + ClusterControlPlaneInit(node *HostConfig) error ClusterNodeJoin(node *HostConfig) error ClusterNodeCleanup(node *HostConfig, delType uint16) error diff --git a/pkg/clusterdeployment/binary/binary.go b/pkg/clusterdeployment/binary/binary.go index 40373e3..96c0dd1 100644 --- a/pkg/clusterdeployment/binary/binary.go +++ b/pkg/clusterdeployment/binary/binary.go @@ -22,6 +22,7 @@ import ( "isula.org/eggo/pkg/clusterdeployment/binary/addons" "isula.org/eggo/pkg/clusterdeployment/binary/bootstrap" "isula.org/eggo/pkg/clusterdeployment/binary/cleanupcluster" + "isula.org/eggo/pkg/clusterdeployment/binary/commontools" "isula.org/eggo/pkg/clusterdeployment/binary/controlplane" "isula.org/eggo/pkg/clusterdeployment/binary/coredns" "isula.org/eggo/pkg/clusterdeployment/binary/etcdcluster" @@ -29,9 +30,11 @@ import ( "isula.org/eggo/pkg/clusterdeployment/binary/loadbalance" "isula.org/eggo/pkg/clusterdeployment/manager" "isula.org/eggo/pkg/utils" + "isula.org/eggo/pkg/utils/dependency" "isula.org/eggo/pkg/utils/kubectl" "isula.org/eggo/pkg/utils/nodemanager" "isula.org/eggo/pkg/utils/runner" + "isula.org/eggo/pkg/utils/task" "github.com/sirupsen/logrus" ) @@ -415,3 +418,149 @@ func (bcp *BinaryClusterDeployment) Finish() { bcp.connections = make(map[string]runner.Runner) logrus.Info("do finish binary deployment success") } + +func (bcp *BinaryClusterDeployment) PreCreateClusterHooks() error { + role := []uint16{api.LoadBalance, api.ETCD, api.Master, api.Worker} + if err := dependency.HookSchedule(bcp.config, bcp.config.Nodes, role, api.SchedulePreJoin); err != nil { + return err + } + return nil +} + +func (bcp *BinaryClusterDeployment) PostCreateClusterHooks() error { + role := []uint16{api.LoadBalance, api.ETCD, api.Master, api.Worker} + if err := dependency.HookSchedule(bcp.config, bcp.config.Nodes, role, api.SchedulePostJoin); err != nil { + return err + } + + if err := checkK8sServices(bcp.config.Nodes); err != nil { + return err + } + return nil +} + +func (bcp *BinaryClusterDeployment) PreDeleteClusterHooks() { + role := []uint16{api.Worker, api.Master, api.ETCD, api.LoadBalance} + if err := dependency.HookSchedule(bcp.config, bcp.config.Nodes, role, api.SchedulePreCleanup); err != nil { + logrus.Warnf("Ignore: Delete cluster PreHook failed: %v", err) + } +} + +func (bcp *BinaryClusterDeployment) PostDeleteClusterHooks() { + role := []uint16{api.Worker, api.Master, api.ETCD, api.LoadBalance} + if err := dependency.HookSchedule(bcp.config, bcp.config.Nodes, role, api.SchedulePostCleanup); err != nil { + logrus.Warnf("Ignore: Delete cluster PostHook failed: %v", err) + } +} + +func (bcp *BinaryClusterDeployment) PreNodeJoinHooks(node *api.HostConfig) error { + role := []uint16{api.Master, api.Worker, api.ETCD} + if err := dependency.HookSchedule(bcp.config, []*api.HostConfig{node}, role, api.SchedulePreJoin); err != nil { + return err + } + return nil +} + +func checkWorkerServices(workers []string) error { + if len(workers) == 0 { + return nil + } + shell := `#!/bin/bash +systemctl status kubelet | tail -20 +[[ $? -ne 0 ]] && exit 1 +systemctl status kube-proxy | tail -20 +[[ $? -ne 0 ]] && exit 1 +exit 0 +` + checker := task.NewTaskInstance( + &commontools.RunShellTask{ + ShellName: "checkWorker", + Shell: shell, + }, + ) + + return nodemanager.RunTaskOnNodes(checker, workers) +} + +func checkMasterServices(masters []string) error { + if len(masters) == 0 { + return nil + } + shell := `#!/bin/bash +systemctl status kube-apiserver | tail -20 +[[ $? -ne 0 ]] && exit 1 +systemctl status kube-controller-manager | tail -20 +[[ $? -ne 0 ]] && exit 1 +systemctl status kube-scheduler | tail -20 +[[ $? -ne 0 ]] && exit 1 +exit 0 +` + checker := task.NewTaskInstance( + &commontools.RunShellTask{ + ShellName: "checkMaster", + Shell: shell, + }, + ) + + return nodemanager.RunTaskOnNodes(checker, masters) +} + +func checkK8sServices(nodes []*api.HostConfig) error { + var wokers, masters []string + + for _, n := range nodes { + if utils.IsType(n.Type, api.Master) { + masters = append(masters, n.Address) + } + if utils.IsType(n.Type, api.Worker) { + wokers = append(wokers, n.Address) + } + } + if err := checkWorkerServices(wokers); err != nil { + return err + } + return checkMasterServices(masters) +} + +func (bcp *BinaryClusterDeployment) PostNodeJoinHooks(node *api.HostConfig) error { + role := []uint16{api.Master, api.Worker, api.ETCD} + if err := dependency.HookSchedule(bcp.config, []*api.HostConfig{node}, role, api.SchedulePostJoin); err != nil { + return err + } + + // taint and label for master node + roles := node.Type + for _, n := range bcp.config.Nodes { + if n.Name == node.Name { + roles |= n.Type + break + } + } + + if utils.IsType(roles, (api.Master & api.Worker)) { + if err := taintAndLabelNode(bcp.config.Name, node.Name); err != nil { + return err + } + } + + // check node status + if err := checkK8sServices([]*api.HostConfig{node}); err != nil { + return err + } + + return nil +} + +func (bcp *BinaryClusterDeployment) PreNodeCleanupHooks(node *api.HostConfig) { + role := []uint16{api.Worker, api.Master, api.ETCD} + if err := dependency.HookSchedule(bcp.config, []*api.HostConfig{node}, role, api.SchedulePreCleanup); err != nil { + logrus.Warnf("Ignore: Delete Node PreHook failed: %v", err) + } +} + +func (bcp *BinaryClusterDeployment) PostNodeCleanupHooks(node *api.HostConfig) { + role := []uint16{api.Worker, api.Master, api.ETCD} + if err := dependency.HookSchedule(bcp.config, []*api.HostConfig{node}, role, api.SchedulePostCleanup); err != nil { + logrus.Warnf("Ignore: Delete Node PostHook failed: %v", err) + } +} diff --git a/pkg/clusterdeployment/binary/commontools/runshell.go b/pkg/clusterdeployment/binary/commontools/runshell.go new file mode 100644 index 0000000..266c488 --- /dev/null +++ b/pkg/clusterdeployment/binary/commontools/runshell.go @@ -0,0 +1,39 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * eggo licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: haozi007 + * Create: 2021-07-27 + * Description: util for run shell + ******************************************************************************/ +package commontools + +import ( + "github.com/sirupsen/logrus" + "isula.org/eggo/pkg/api" + "isula.org/eggo/pkg/utils/runner" +) + +type RunShellTask struct { + ShellName string + Shell string +} + +func (ct *RunShellTask) Name() string { + return "RunShellTask" +} + +func (ct *RunShellTask) Run(r runner.Runner, hcf *api.HostConfig) error { + out, err := r.RunShell(ct.Shell, ct.ShellName) + if err != nil { + return err + } + logrus.Debugf("run shell: %s, get out:\n%s", ct.ShellName, out) + return nil +} diff --git a/pkg/clusterdeployment/clusterdeploy.go b/pkg/clusterdeployment/clusterdeploy.go index 353e877..e5f69f0 100644 --- a/pkg/clusterdeployment/clusterdeploy.go +++ b/pkg/clusterdeployment/clusterdeploy.go @@ -25,7 +25,6 @@ import ( _ "isula.org/eggo/pkg/clusterdeployment/binary" "isula.org/eggo/pkg/clusterdeployment/manager" "isula.org/eggo/pkg/utils" - "isula.org/eggo/pkg/utils/dependency" "isula.org/eggo/pkg/utils/nodemanager" ) @@ -61,9 +60,8 @@ func doCreateCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig) er } } - // Step2: Hook SchedulePreJoin - role := []uint16{api.LoadBalance, api.ETCD, api.Master, api.Worker} - if err := dependency.HookSchedule(cc, cc.Nodes, role, api.SchedulePreJoin); err != nil { + // Step2: run precreate cluster hooks + if err := handler.PreCreateClusterHooks(); err != nil { return err } @@ -94,20 +92,21 @@ func doCreateCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig) er return err } } - //Step7: setup addons for cluster // wait all nodes ready if err := nodemanager.WaitNodesFinishWithProgress(joinNodeIDs, time.Minute*5); err != nil { return err } + //Step7: setup addons for cluster if err := handler.AddonsSetup(); err != nil { return err } - // Step8: Hook SchedulePostJoin - if err := dependency.HookSchedule(cc, cc.Nodes, role, api.SchedulePostJoin); err != nil { + // Step8: run postcreate cluster hooks + if err := handler.PostCreateClusterHooks(); err != nil { return err } + allNodes := utils.GetAllIPs(cc.Nodes) if err := nodemanager.WaitNodesFinishWithProgress(allNodes, time.Minute*5); err != nil { return err @@ -151,9 +150,8 @@ func doJoinNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, hostcon return err } - // Hook SchedulePreJoin - role := []uint16{api.Master, api.Worker, api.ETCD} - if err := dependency.HookSchedule(cc, []*api.HostConfig{hostconfig}, role, api.SchedulePreJoin); err != nil { + // Pre node join Hooks + if err := handler.PreNodeJoinHooks(hostconfig); err != nil { return err } @@ -167,8 +165,8 @@ func doJoinNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, hostcon return err } - // Hook SchedulePostJoin - if err := dependency.HookSchedule(cc, []*api.HostConfig{hostconfig}, role, api.SchedulePostJoin); err != nil { + // Post node join Hooks + if err := handler.PostNodeJoinHooks(hostconfig); err != nil { return err } @@ -223,11 +221,8 @@ func JoinNode(cc *api.ClusterConfig, hostconfig *api.HostConfig) error { } func doDeleteNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, h *api.HostConfig) error { - // Hook SchedulePreCleanup - role := []uint16{api.Worker, api.Master, api.ETCD} - if err := dependency.HookSchedule(cc, []*api.HostConfig{h}, role, api.SchedulePreCleanup); err != nil { - return err - } + // Pre node delete Hooks + handler.PreNodeCleanupHooks(h) if utils.IsType(h.Type, api.Worker) { if err := handler.ClusterNodeCleanup(h, api.Worker); err != nil { @@ -247,10 +242,8 @@ func doDeleteNode(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig, h *ap } } - // Hook SchedulePostCleanup - if err := dependency.HookSchedule(cc, []*api.HostConfig{h}, role, api.SchedulePostCleanup); err != nil { - return err - } + // Post node delete Hooks + handler.PostNodeCleanupHooks(h) if err := handler.MachineInfraDestroy(h); err != nil { logrus.Warnf("cleanup infrastructure for node: %s failed: %v", h.Name, err) @@ -290,11 +283,8 @@ func DeleteNode(cc *api.ClusterConfig, hostconfig *api.HostConfig) error { } func doRemoveCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig) { - // Step1: Hook SchedulePreCleanup - role := []uint16{api.Worker, api.Master, api.ETCD, api.LoadBalance} - if err := dependency.HookSchedule(cc, cc.Nodes, role, api.SchedulePreCleanup); err != nil { - logrus.Errorf("Hook SchedulePreCleanup failed: %v", err) - } + // Step1: Pre delete cluster Hooks + handler.PreDeleteClusterHooks() // Step2: cleanup addons err := handler.AddonsDestroy() @@ -344,10 +334,8 @@ func doRemoveCluster(handler api.ClusterDeploymentAPI, cc *api.ClusterConfig) { logrus.Warnf("[cluster] cleanup etcd cluster failed: %v", err) } - // Step7: Hook SchedulePostCleanup - if err := dependency.HookSchedule(cc, cc.Nodes, role, api.SchedulePostCleanup); err != nil { - logrus.Errorf("Hook SchedulePostCleanup failed: %v", err) - } + // Step7: Post delete cluster Hooks + handler.PostDeleteClusterHooks() // Step8: cleanup infrastructure for _, n := range cc.Nodes { diff --git a/pkg/utils/nodemanager/node.go b/pkg/utils/nodemanager/node.go index b1235c7..d610e3b 100644 --- a/pkg/utils/nodemanager/node.go +++ b/pkg/utils/nodemanager/node.go @@ -115,13 +115,15 @@ func (n *Node) updateNodeStatus(message string, status int) { } } -func (n *Node) PushTask(task task.Task) bool { - if n.status.HasError() { +func (n *Node) PushTask(t task.Task) bool { + // only run ignore error tasks to cleanup node + if n.status.HasError() && task.IsIgnoreError(t) { logrus.Debugf("node finished with error: %v", n.status.Message) return false } + select { - case n.queue <- task: + case n.queue <- t: n.updateTotalCnt() return true default: diff --git a/pkg/utils/nodemanager/nodemanager.go b/pkg/utils/nodemanager/nodemanager.go index fc3f1ee..b135890 100644 --- a/pkg/utils/nodemanager/nodemanager.go +++ b/pkg/utils/nodemanager/nodemanager.go @@ -92,8 +92,8 @@ func doRetryPushTask(t task.Task, retryNodes []*Node) error { } func RunTaskOnNodes(t task.Task, nodes []string) error { - manager.lock.RLock() - defer manager.lock.RUnlock() + manager.lock.Lock() + defer manager.lock.Unlock() var retryNodes []*Node for _, id := range nodes { if n, ok := manager.nodes[id]; ok { @@ -112,8 +112,8 @@ func RunTaskOnNodes(t task.Task, nodes []string) error { func RunTaskOnAll(t task.Task) error { var retryNodes []*Node - manager.lock.RLock() - defer manager.lock.RUnlock() + manager.lock.Lock() + defer manager.lock.Unlock() for id, n := range manager.nodes { if n.PushTask(t) { continue @@ -126,8 +126,8 @@ func RunTaskOnAll(t task.Task) error { } func RunTasksOnNode(tasks []task.Task, node string) error { - manager.lock.RLock() - defer manager.lock.RUnlock() + manager.lock.Lock() + defer manager.lock.Unlock() for _, t := range tasks { if n, ok := manager.nodes[node]; ok { @@ -152,9 +152,6 @@ func RunTasksOnNode(tasks []task.Task, node string) error { } func RunTasksOnNodes(tasks []task.Task, nodes []string) error { - manager.lock.RLock() - defer manager.lock.RUnlock() - for _, n := range nodes { if err := RunTasksOnNode(tasks, n); err != nil { logrus.Errorf("run tasks on node %s failed: %v", n, err) @@ -166,8 +163,8 @@ func RunTasksOnNodes(tasks []task.Task, nodes []string) error { } func RunTaskOnOneNode(t task.Task, nodes []string) (string, error) { - manager.lock.RLock() - defer manager.lock.RUnlock() + manager.lock.Lock() + defer manager.lock.Unlock() for _, id := range nodes { n, ok := manager.nodes[id] diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 86be713..72f8777 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -51,7 +51,7 @@ func GetEggoDir() string { } func IsType(curType uint16, expectedType uint16) bool { - return curType&expectedType != 0 + return (curType & expectedType) == expectedType } func AddSudo(cmd string) string { diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go new file mode 100644 index 0000000..df3743f --- /dev/null +++ b/pkg/utils/utils_test.go @@ -0,0 +1,121 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * eggo licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: zhangxiaoyu + * Create: 2021-05-19 + * Description: eggo utils ut + ******************************************************************************/ + +package utils + +import ( + "sort" + "testing" +) + +func TestIsType(t *testing.T) { + cs := []struct { + name string + val1 uint16 + val2 uint16 + expect bool + }{ + { + "test1", + 1, + 1, + true, + }, + { + "test2", + 7, + 7, + true, + }, + { + "test3", + 123, + 123, + true, + }, + { + "test11", + 8, + 1, + false, + }, + { + "test12", + 0, + 1, + false, + }, + { + "test13", + 120, + 121, + false, + }, + } + + for _, c := range cs { + if IsType(c.val1, c.val2) != c.expect { + t.Errorf("case: %s, expect: %v, get: %v", c.name, c.expect, IsType(c.val1, c.val2)) + } + } +} + +func TestRemoveDupString(t *testing.T) { + cs := []struct { + name string + val []string + expect []string + }{ + { + "test1", + []string{"abc", "bcd"}, + []string{"abc", "bcd"}, + }, + { + "test2", + []string{"abc", "bcd", "abc"}, + []string{"abc", "bcd"}, + }, + { + "test3", + []string{"xxx", "bcd"}, + []string{"bcd", "xxx"}, + }, + { + "test4", + []string{"xxx", "xxx", "xxx"}, + []string{"xxx"}, + }, + } + + for _, c := range cs { + flag := true + got := RemoveDupString(c.val) + if len(got) == len(c.expect) { + sort.Strings(got) + sort.Strings(c.expect) + for i := 0; i < len(got); i++ { + if got[i] != c.expect[i] { + flag = false + } + } + } else { + flag = false + } + if !flag { + t.Errorf("case: %s, expect: %v, get: %v", c.name, c.expect, got) + } + } +} -- 2.25.1