runc/patch/0042-runc-libct-fix-a-race-with-systemd-removal.patch
2023-06-21 16:27:12 +08:00

98 lines
3.7 KiB
Diff

From 45abd12f084c62f3fbd24ab0fb0d3c1e3edf6ac7 Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Tue, 4 Apr 2023 16:59:43 -0700
Subject: [PATCH] libct: fix a race with systemd removal
For a previous attempt to fix that (and added test cases), see commit
9087f2e827d971.
Alas, it's not always working because of cgroup directory TOCTOU.
To solve this and avoid the race, add an error _after_ the operation.
Implement it as a method that ignores the error that should be ignored.
Instead of currentStatus(), use faster runType(), since we are not
interested in Paused status here.
For Processes(), remove the pre-op check, and only use it after getting
an error, making the non-error path more straightforward.
For Signal(), add a second check after getting an error. The first check
is left as is because signalAllProcesses might print a warning if the
cgroup does not exist, and we'd like to avoid that.
This should fix an occasional failure like this one:
not ok 84 kill detached busybox
# (in test file tests/integration/kill.bats, line 27)
# `[ "$status" -eq 0 ]' failed
....
# runc kill test_busybox KILL (status=0):
# runc kill -a test_busybox 0 (status=1):
# time="2023-04-04T18:24:27Z" level=error msg="lstat /sys/fs/cgroup/devices/system.slice/runc-test_busybox.scope: no such file or directory"
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
libcontainer/container_linux.go | 33 +++++++++++++++++++++------------
1 file changed, 21 insertions(+), 12 deletions(-)
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 1a210fa2..1189e5af 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -146,19 +146,27 @@ func (c *linuxContainer) OCIState() (*specs.State, error) {
return c.currentOCIState()
}
-func (c *linuxContainer) Processes() ([]int, error) {
- var pids []int
- status, err := c.currentStatus()
- if err != nil {
- return pids, err
+// ignoreCgroupError filters out cgroup-related errors that can be ignored,
+// because the container is stopped and its cgroup is gone.
+func (c *linuxContainer) ignoreCgroupError(err error) error {
+ if err == nil {
+ return nil
}
- // for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
- if status == Stopped && !c.cgroupManager.Exists() {
- return pids, nil
+ if errors.Is(err, os.ErrNotExist) && c.runType() == Stopped && !c.cgroupManager.Exists() {
+ return nil
}
+ return err
+}
- pids, err = c.cgroupManager.GetAllPids()
- if err != nil {
+// Processes returns the PIDs inside this container. The PIDs are in the
+// namespace of the calling process.
+//
+// Some of the returned PIDs may no longer refer to processes in the container,
+// unless the container state is PAUSED in which case every PID in the slice is
+// valid.
+func (c *linuxContainer) Processes() ([]int, error) {
+ pids, err := c.cgroupManager.GetAllPids()
+ if err = c.ignoreCgroupError(err); err != nil {
return nil, fmt.Errorf("unable to get all container pids: %w", err)
}
return pids, nil
@@ -382,11 +390,12 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
return err
}
if all {
- // for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
if status == Stopped && !c.cgroupManager.Exists() {
+ // Avoid calling signalAllProcesses which may print
+ // a warning trying to freeze a non-existing cgroup.
return nil
}
- return signalAllProcesses(c.cgroupManager, s)
+ return c.ignoreCgroupError(signalAllProcesses(c.cgroupManager, s))
}
// to avoid a PID reuse attack
if status == Running || status == Created || status == Paused {
--
2.33.0