122 lines
4.9 KiB
Diff
122 lines
4.9 KiB
Diff
From fd61dbb032e526bd323702d954520669761647bb Mon Sep 17 00:00:00 2001
|
|
From: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
|
|
Date: Mon, 26 Dec 2022 12:04:26 +0900
|
|
Subject: [PATCH] rootless: fix /sys/fs/cgroup mounts
|
|
|
|
It was found that rootless runc makes `/sys/fs/cgroup` writable in following conditons:
|
|
|
|
1. when runc is executed inside the user namespace, and the config.json does not specify the cgroup namespace to be unshared
|
|
(e.g.., `(docker|podman|nerdctl) run --cgroupns=host`, with Rootless Docker/Podman/nerdctl)
|
|
2. or, when runc is executed outside the user namespace, and `/sys` is mounted with `rbind, ro`
|
|
(e.g., `runc spec --rootless`; this condition is very rare)
|
|
|
|
A container may gain the write access to user-owned cgroup hierarchy `/sys/fs/cgroup/user.slice/...` on the host.
|
|
Other users's cgroup hierarchies are not affected.
|
|
|
|
To fix the issue, this commit does:
|
|
1. Remount `/sys/fs/cgroup` to apply `MS_RDONLY` when it is being bind-mounted
|
|
2. Mask `/sys/fs/cgroup` when the bind source is unavailable
|
|
|
|
Fix CVE-2023-25809 (GHSA-m8cg-xc2p-r3fc)
|
|
|
|
Co-authored-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
|
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
|
|
---
|
|
libcontainer/rootfs_linux.go | 53 ++++++++++++++++++++++-------------
|
|
tests/integration/mounts.bats | 17 +++++++++++
|
|
2 files changed, 51 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
|
index 280a6332..ec14f97e 100644
|
|
--- a/libcontainer/rootfs_linux.go
|
|
+++ b/libcontainer/rootfs_linux.go
|
|
@@ -334,26 +334,41 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
|
|
if err := os.MkdirAll(dest, 0o755); err != nil {
|
|
return err
|
|
}
|
|
- return utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
|
- if err := mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
|
- // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
|
- if errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY) {
|
|
- src := fs2.UnifiedMountpoint
|
|
- if c.cgroupns && c.cgroup2Path != "" {
|
|
- // Emulate cgroupns by bind-mounting
|
|
- // the container cgroup path rather than
|
|
- // the whole /sys/fs/cgroup.
|
|
- src = c.cgroup2Path
|
|
- }
|
|
- err = mount(src, m.Destination, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
|
- if c.rootlessCgroups && errors.Is(err, unix.ENOENT) {
|
|
- err = nil
|
|
- }
|
|
- }
|
|
- return err
|
|
- }
|
|
- return nil
|
|
+ err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
|
+ return mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data)
|
|
})
|
|
+ if err == nil || !(errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY)) {
|
|
+ return err
|
|
+ }
|
|
+
|
|
+ // When we are in UserNS but CgroupNS is not unshared, we cannot mount
|
|
+ // cgroup2 (#2158), so fall back to bind mount.
|
|
+ bindM := &configs.Mount{
|
|
+ Device: "bind",
|
|
+ Source: fs2.UnifiedMountpoint,
|
|
+ Destination: m.Destination,
|
|
+ Flags: unix.MS_BIND | m.Flags,
|
|
+ PropagationFlags: m.PropagationFlags,
|
|
+ }
|
|
+ if c.cgroupns && c.cgroup2Path != "" {
|
|
+ // Emulate cgroupns by bind-mounting the container cgroup path
|
|
+ // rather than the whole /sys/fs/cgroup.
|
|
+ bindM.Source = c.cgroup2Path
|
|
+ }
|
|
+ // mountToRootfs() handles remounting for MS_RDONLY.
|
|
+ // No need to set c.fd here, because mountToRootfs() calls utils.WithProcfd() by itself in mountPropagate().
|
|
+ err = mountToRootfs(bindM, c)
|
|
+ if c.rootlessCgroups && errors.Is(err, unix.ENOENT) {
|
|
+ // ENOENT (for `src = c.cgroup2Path`) happens when rootless runc is being executed
|
|
+ // outside the userns+mountns.
|
|
+ //
|
|
+ // Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted
|
|
+ // with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`).
|
|
+ err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
|
+ return maskPath(procfd, c.label)
|
|
+ })
|
|
+ }
|
|
+ return err
|
|
}
|
|
|
|
func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
|
diff --git a/tests/integration/mounts.bats b/tests/integration/mounts.bats
|
|
index 1ec675ac..1e72c5b1 100644
|
|
--- a/tests/integration/mounts.bats
|
|
+++ b/tests/integration/mounts.bats
|
|
@@ -63,3 +63,20 @@ function teardown() {
|
|
runc run test_busybox
|
|
[ "$status" -eq 0 ]
|
|
}
|
|
+
|
|
+# https://github.com/opencontainers/runc/security/advisories/GHSA-m8cg-xc2p-r3fc
|
|
+@test "runc run [ro /sys/fs/cgroup mount]" {
|
|
+ # With cgroup namespace
|
|
+ update_config '.process.args |= ["sh", "-euc", "for f in `grep /sys/fs/cgroup /proc/mounts | awk \"{print \\\\$2}\"| uniq`; do grep -w $f /proc/mounts | tail -n1; done"]'
|
|
+ runc run test_busybox
|
|
+ [ "$status" -eq 0 ]
|
|
+ [ "${#lines[@]}" -ne 0 ]
|
|
+ for line in "${lines[@]}"; do [[ "${line}" == *'ro,'* ]]; done
|
|
+
|
|
+ # Without cgroup namespace
|
|
+ update_config '.linux.namespaces -= [{"type": "cgroup"}]'
|
|
+ runc run test_busybox
|
|
+ [ "$status" -eq 0 ]
|
|
+ [ "${#lines[@]}" -ne 0 ]
|
|
+ for line in "${lines[@]}"; do [[ "${line}" == *'ro,'* ]]; done
|
|
+}
|
|
--
|
|
2.33.0
|
|
|