From 7a175514804e423257225a33fd7788ddd621e567 Mon Sep 17 00:00:00 2001 From: jingrui Date: Tue, 3 Sep 2019 16:10:19 +0800 Subject: [PATCH] docker: fix fd leak on reboot The original db cleanup is too late, because docker has opened the db files. Move it to dockerd start entry. Change-Id: I462c6b2fe44a0447fd5cc111f25b2e26b7488dc2 Signed-off-by: jingrui --- components/engine/cmd/dockerd/daemon.go | 35 ++++++++++++- components/engine/daemon/daemon.go | 68 ------------------------- 2 files changed, 34 insertions(+), 69 deletions(-) diff --git a/components/engine/cmd/dockerd/daemon.go b/components/engine/cmd/dockerd/daemon.go index 78cd41ac59..1981175a4a 100644 --- a/components/engine/cmd/dockerd/daemon.go +++ b/components/engine/cmd/dockerd/daemon.go @@ -4,6 +4,7 @@ import ( "context" "crypto/tls" "fmt" + "io/ioutil" "os" "path/filepath" "runtime" @@ -71,6 +72,38 @@ func NewDaemonCli() *DaemonCli { return &DaemonCli{} } +func cleanupLocalDB(db string) { + _, err := os.Stat(db) + if err == nil { + err = os.Remove(db) + logrus.Infof("cleanup DB %s error=%v", db, err) + } +} + +// DB files may corrupted on exception poweroff but can be rebuild at run time, +// so we can remove DB files on OS starts avoid daemon can not startup. +func cleanupLocalDBs(run, root string) { + // check db lock is exist, do nothing if file is existed + dbLockPath := filepath.Join(run, "dblock") + _, err := os.Stat(dbLockPath) + if err == nil { + return + } + if !os.IsNotExist(err) { + logrus.Errorf("stat dblock failed %v", err) + return + } + ioutil.WriteFile(dbLockPath, []byte{}, 0600) + cleanupLocalDB(filepath.Join(root, "containerd/daemon/io.containerd.metadata.v1.bolt/meta.db")) + cleanupLocalDB(filepath.Join(root, "builder/fscache.db")) + cleanupLocalDB(filepath.Join(root, "volumes/metadata.db")) + cleanupLocalDB(filepath.Join(root, "network/files/local-kv.db")) + cleanupLocalDB(filepath.Join(root, "accelerator/accel.db")) + cleanupLocalDB(filepath.Join(root, "buildkit/metadata.db")) + cleanupLocalDB(filepath.Join(root, "buildkit/cache.db")) + cleanupLocalDB(filepath.Join(root, "buildkit/snapshots.db")) +} + func (cli *DaemonCli) start(opts *daemonOptions) (err error) { stopc := make(chan bool) defer close(stopc) @@ -151,7 +184,7 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) { return fmt.Errorf("Failed to generate containerd options: %v", err) } - daemon.CleanupContainerdDBs(cli.Config.ExecRoot, cli.Config.Root) + cleanupLocalDBs(cli.Config.ExecRoot, cli.Config.Root) r, err := supervisor.Start(ctx, filepath.Join(cli.Config.Root, "containerd"), filepath.Join(cli.Config.ExecRoot, "containerd"), opts...) if err != nil { cancel() diff --git a/components/engine/daemon/daemon.go b/components/engine/daemon/daemon.go index 01351cc544..b9af915ef8 100644 --- a/components/engine/daemon/daemon.go +++ b/components/engine/daemon/daemon.go @@ -514,8 +514,6 @@ func (daemon *Daemon) restore() error { logrus.Errorf("removeRedundantMounts failed %v", err) } - daemon.cleanupLocalDBs(daemon.configStore.ExecRoot, daemon.configStore.Root) - containerIDs := make(map[string]struct{}) for cid, _ := range containers { containerIDs[cid] = struct{}{} @@ -616,72 +614,6 @@ func (daemon *Daemon) restore() error { return nil } -func cleanupLocalDB(db string) { - _, err := os.Stat(db) - if err == nil { - err = os.Remove(db) - logrus.Infof("cleanup DB %s error=%v", db, err) - } -} - -func CleanupContainerdDBs(run, root string) { - // check db lock is exist, do nothing if file is existed - dbLockPath := filepath.Join(run, "dblock") - _, err := os.Stat(dbLockPath) - if err == nil { - return - } - if !os.IsNotExist(err) { - logrus.Errorf("stat DB dblock failed %v", err) - return - } - cleanupLocalDB(filepath.Join(root, "containerd/daemon/io.containerd.metadata.v1.bolt/meta.db")) -} - -// DB files may corrupted on exception poweroff but can be rebuild at run time, -// so we can remove DB files on OS starts avoid daemon can not startup. -func (daemon *Daemon) cleanupLocalDBs(run, root string) { - // check db lock is exist, do nothing if file is existed - dbLockPath := filepath.Join(run, "dblock") - _, err := os.Stat(dbLockPath) - if err == nil { - return - } - if !os.IsNotExist(err) { - logrus.Errorf("stat dblock failed %v", err) - return - } - ioutil.WriteFile(dbLockPath, []byte{}, 0600) - - removeAllDB := func() { - cleanupLocalDB(filepath.Join(root, "builder/fscache.db")) - cleanupLocalDB(filepath.Join(root, "volumes/metadata.db")) - cleanupLocalDB(filepath.Join(root, "network/files/local-kv.db")) - cleanupLocalDB(filepath.Join(root, "accelerator/accel.db")) - cleanupLocalDB(filepath.Join(root, "buildkit/metadata.db")) - cleanupLocalDB(filepath.Join(root, "buildkit/cache.db")) - cleanupLocalDB(filepath.Join(root, "buildkit/snapshots.db")) - } - - if daemon.containers == nil { - logrus.Warnf("nil containers, cleanup local DB after OS start ...") - removeAllDB() - return - } - - ls, err := daemon.Containers(&types.ContainerListOptions{}) - if err != nil { - logrus.Errorf("list containers failed %v", err) - return - } - - if len(ls) == 0 { - logrus.Warnf("no running containers, cleanup local DB after OS start ...") - removeAllDB() - return - } -} - // RestartSwarmContainers restarts any autostart container which has a // swarm endpoint. func (daemon *Daemon) RestartSwarmContainers() { -- 2.17.1