docker/patch/0236-docker-set-freezer.state-to-Thawed-to-increase-freez.patch

72 lines
2.9 KiB
Diff
Raw Permalink Normal View History

2023-10-12 15:12:06 +08:00
From 2a6d6321956d0a6edbec7421357e14c01dc7f5ab Mon Sep 17 00:00:00 2001
From: chenjiankun <chenjiankun1@huawei.com>
Date: Fri, 13 Jan 2023 17:13:22 +0800
Subject: [PATCH] docker: set freezer.state to Thawed to increase freeze
chances
docker pause/unpause with parallel docker exec can lead to freezing
state, set freezer.state to Thawed to increase freeze chances
2023-10-12 15:12:06 +08:00
Occasional short sleep before reading the state back also improves
the chances to succeed in freezing in case of a very slow system.
---
2023-10-12 15:12:06 +08:00
components/engine/daemon/freezer/freezer.go | 28 +++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/components/engine/daemon/freezer/freezer.go b/components/engine/daemon/freezer/freezer.go
2023-10-12 15:12:06 +08:00
index 6df176f2f..bd45304f4 100644
--- a/components/engine/daemon/freezer/freezer.go
+++ b/components/engine/daemon/freezer/freezer.go
@@ -186,6 +186,7 @@ func (f *freezer) updateCgroup(state string) error {
timeout := time.After(30 * time.Second)
ticker := time.NewTicker(1 * time.Millisecond)
defer ticker.Stop()
+ count := 0
for {
select {
case <-timeout:
@@ -194,6 +195,26 @@ func (f *freezer) updateCgroup(state string) error {
}
return fmt.Errorf("update freezer cgroup timeout for 30s")
case <-ticker.C:
+ // As per older kernel docs (freezer-subsystem.txt before
+ // kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
+ // userspace should either retry or thaw. While current
+ // kernel cgroup v1 docs no longer mention a need to retry,
+ // the kernel (tested on v5.4, Ubuntu 20.04) can't reliably
+ // freeze a cgroup while new processes keep appearing in it
+ // (either via fork/clone or by writing new PIDs to
+ // cgroup.procs).
+ //
+ // The numbers below are chosen to have a decent chance to
+ // succeed even in the worst case scenario (docker pause/unpause
+ // with parallel docker exec).
+ //
+ // Adding any amount of sleep in between retries did not
+ // increase the chances of successful freeze.
+ if count++; count % 50 == 0 && state == string(configs.Frozen) {
+ writeFile(f.path, "freezer.state", string(configs.Thawed))
+ time.Sleep(10 * time.Millisecond)
+ }
+
// In case this loop does not exit because it doesn't get the expected
// state, let's write again this state, hoping it's going to be properly
// set this time. Otherwise, this loop could run infinitely, waiting for
2023-10-12 15:12:06 +08:00
@@ -201,6 +222,13 @@ func (f *freezer) updateCgroup(state string) error {
if err := writeFile(f.path, "freezer.state", state); err != nil {
return fmt.Errorf("cannot write freezer.state for %#v", err)
}
+ if count%25 == 24 {
+ // Occasional short sleep before reading
+ // the state back also improves the chances to
+ // succeed in freezing in case of a very slow
+ // system.
+ time.Sleep(10 * time.Microsecond)
+ }
newState, err := readFile(f.path, "freezer.state")
if err != nil {
return fmt.Errorf("read freezer.state failed after write: %v", err)
--
2023-10-12 15:12:06 +08:00
2.33.0