docker/patch/0236-docker-set-freezer.state-to-Thawed-to-increase-freez.patch
2023-10-12 15:12:06 +08:00

72 lines
2.9 KiB
Diff

From 2a6d6321956d0a6edbec7421357e14c01dc7f5ab Mon Sep 17 00:00:00 2001
From: chenjiankun <chenjiankun1@huawei.com>
Date: Fri, 13 Jan 2023 17:13:22 +0800
Subject: [PATCH] docker: set freezer.state to Thawed to increase freeze
chances
docker pause/unpause with parallel docker exec can lead to freezing
state, set freezer.state to Thawed to increase freeze chances
Occasional short sleep before reading the state back also improves
the chances to succeed in freezing in case of a very slow system.
---
components/engine/daemon/freezer/freezer.go | 28 +++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/components/engine/daemon/freezer/freezer.go b/components/engine/daemon/freezer/freezer.go
index 6df176f2f..bd45304f4 100644
--- a/components/engine/daemon/freezer/freezer.go
+++ b/components/engine/daemon/freezer/freezer.go
@@ -186,6 +186,7 @@ func (f *freezer) updateCgroup(state string) error {
timeout := time.After(30 * time.Second)
ticker := time.NewTicker(1 * time.Millisecond)
defer ticker.Stop()
+ count := 0
for {
select {
case <-timeout:
@@ -194,6 +195,26 @@ func (f *freezer) updateCgroup(state string) error {
}
return fmt.Errorf("update freezer cgroup timeout for 30s")
case <-ticker.C:
+ // As per older kernel docs (freezer-subsystem.txt before
+ // kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
+ // userspace should either retry or thaw. While current
+ // kernel cgroup v1 docs no longer mention a need to retry,
+ // the kernel (tested on v5.4, Ubuntu 20.04) can't reliably
+ // freeze a cgroup while new processes keep appearing in it
+ // (either via fork/clone or by writing new PIDs to
+ // cgroup.procs).
+ //
+ // The numbers below are chosen to have a decent chance to
+ // succeed even in the worst case scenario (docker pause/unpause
+ // with parallel docker exec).
+ //
+ // Adding any amount of sleep in between retries did not
+ // increase the chances of successful freeze.
+ if count++; count % 50 == 0 && state == string(configs.Frozen) {
+ writeFile(f.path, "freezer.state", string(configs.Thawed))
+ time.Sleep(10 * time.Millisecond)
+ }
+
// In case this loop does not exit because it doesn't get the expected
// state, let's write again this state, hoping it's going to be properly
// set this time. Otherwise, this loop could run infinitely, waiting for
@@ -201,6 +222,13 @@ func (f *freezer) updateCgroup(state string) error {
if err := writeFile(f.path, "freezer.state", state); err != nil {
return fmt.Errorf("cannot write freezer.state for %#v", err)
}
+ if count%25 == 24 {
+ // Occasional short sleep before reading
+ // the state back also improves the chances to
+ // succeed in freezing in case of a very slow
+ // system.
+ time.Sleep(10 * time.Microsecond)
+ }
newState, err := readFile(f.path, "freezer.state")
if err != nil {
return fmt.Errorf("read freezer.state failed after write: %v", err)
--
2.33.0