98 lines
3.7 KiB
Diff
98 lines
3.7 KiB
Diff
From 45abd12f084c62f3fbd24ab0fb0d3c1e3edf6ac7 Mon Sep 17 00:00:00 2001
|
|
From: Kir Kolyshkin <kolyshkin@gmail.com>
|
|
Date: Tue, 4 Apr 2023 16:59:43 -0700
|
|
Subject: [PATCH] libct: fix a race with systemd removal
|
|
|
|
For a previous attempt to fix that (and added test cases), see commit
|
|
9087f2e827d971.
|
|
|
|
Alas, it's not always working because of cgroup directory TOCTOU.
|
|
|
|
To solve this and avoid the race, add an error _after_ the operation.
|
|
Implement it as a method that ignores the error that should be ignored.
|
|
Instead of currentStatus(), use faster runType(), since we are not
|
|
interested in Paused status here.
|
|
|
|
For Processes(), remove the pre-op check, and only use it after getting
|
|
an error, making the non-error path more straightforward.
|
|
|
|
For Signal(), add a second check after getting an error. The first check
|
|
is left as is because signalAllProcesses might print a warning if the
|
|
cgroup does not exist, and we'd like to avoid that.
|
|
|
|
This should fix an occasional failure like this one:
|
|
|
|
not ok 84 kill detached busybox
|
|
# (in test file tests/integration/kill.bats, line 27)
|
|
# `[ "$status" -eq 0 ]' failed
|
|
....
|
|
# runc kill test_busybox KILL (status=0):
|
|
# runc kill -a test_busybox 0 (status=1):
|
|
# time="2023-04-04T18:24:27Z" level=error msg="lstat /sys/fs/cgroup/devices/system.slice/runc-test_busybox.scope: no such file or directory"
|
|
|
|
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
|
---
|
|
libcontainer/container_linux.go | 33 +++++++++++++++++++++------------
|
|
1 file changed, 21 insertions(+), 12 deletions(-)
|
|
|
|
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
|
|
index 1a210fa2..1189e5af 100644
|
|
--- a/libcontainer/container_linux.go
|
|
+++ b/libcontainer/container_linux.go
|
|
@@ -146,19 +146,27 @@ func (c *linuxContainer) OCIState() (*specs.State, error) {
|
|
return c.currentOCIState()
|
|
}
|
|
|
|
-func (c *linuxContainer) Processes() ([]int, error) {
|
|
- var pids []int
|
|
- status, err := c.currentStatus()
|
|
- if err != nil {
|
|
- return pids, err
|
|
+// ignoreCgroupError filters out cgroup-related errors that can be ignored,
|
|
+// because the container is stopped and its cgroup is gone.
|
|
+func (c *linuxContainer) ignoreCgroupError(err error) error {
|
|
+ if err == nil {
|
|
+ return nil
|
|
}
|
|
- // for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
|
|
- if status == Stopped && !c.cgroupManager.Exists() {
|
|
- return pids, nil
|
|
+ if errors.Is(err, os.ErrNotExist) && c.runType() == Stopped && !c.cgroupManager.Exists() {
|
|
+ return nil
|
|
}
|
|
+ return err
|
|
+}
|
|
|
|
- pids, err = c.cgroupManager.GetAllPids()
|
|
- if err != nil {
|
|
+// Processes returns the PIDs inside this container. The PIDs are in the
|
|
+// namespace of the calling process.
|
|
+//
|
|
+// Some of the returned PIDs may no longer refer to processes in the container,
|
|
+// unless the container state is PAUSED in which case every PID in the slice is
|
|
+// valid.
|
|
+func (c *linuxContainer) Processes() ([]int, error) {
|
|
+ pids, err := c.cgroupManager.GetAllPids()
|
|
+ if err = c.ignoreCgroupError(err); err != nil {
|
|
return nil, fmt.Errorf("unable to get all container pids: %w", err)
|
|
}
|
|
return pids, nil
|
|
@@ -382,11 +390,12 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
|
|
return err
|
|
}
|
|
if all {
|
|
- // for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
|
|
if status == Stopped && !c.cgroupManager.Exists() {
|
|
+ // Avoid calling signalAllProcesses which may print
|
|
+ // a warning trying to freeze a non-existing cgroup.
|
|
return nil
|
|
}
|
|
- return signalAllProcesses(c.cgroupManager, s)
|
|
+ return c.ignoreCgroupError(signalAllProcesses(c.cgroupManager, s))
|
|
}
|
|
// to avoid a PID reuse attack
|
|
if status == Running || status == Created || status == Paused {
|
|
--
|
|
2.33.0
|
|
|