!138 runc:fix /sys/fs/cgroup mounts and Prohibit /proc and /sys to be symlinks
From: @zhong-jiawei-1 Reviewed-by: @Vanient, @duguhaotian Signed-off-by: @duguhaotian
This commit is contained in:
commit
41640547d3
@ -1 +1 @@
|
||||
bc3b1abe72220ea5a0a8390f174f1db0b76888f6
|
||||
d8a55778488f67c6f7f58db882e43164feda5ca0
|
||||
|
||||
121
patch/0038-runc-rootless-fix-sys-fs-cgroup-mounts.patch
Normal file
121
patch/0038-runc-rootless-fix-sys-fs-cgroup-mounts.patch
Normal file
@ -0,0 +1,121 @@
|
||||
From fd61dbb032e526bd323702d954520669761647bb Mon Sep 17 00:00:00 2001
|
||||
From: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
|
||||
Date: Mon, 26 Dec 2022 12:04:26 +0900
|
||||
Subject: [PATCH] rootless: fix /sys/fs/cgroup mounts
|
||||
|
||||
It was found that rootless runc makes `/sys/fs/cgroup` writable in following conditons:
|
||||
|
||||
1. when runc is executed inside the user namespace, and the config.json does not specify the cgroup namespace to be unshared
|
||||
(e.g.., `(docker|podman|nerdctl) run --cgroupns=host`, with Rootless Docker/Podman/nerdctl)
|
||||
2. or, when runc is executed outside the user namespace, and `/sys` is mounted with `rbind, ro`
|
||||
(e.g., `runc spec --rootless`; this condition is very rare)
|
||||
|
||||
A container may gain the write access to user-owned cgroup hierarchy `/sys/fs/cgroup/user.slice/...` on the host.
|
||||
Other users's cgroup hierarchies are not affected.
|
||||
|
||||
To fix the issue, this commit does:
|
||||
1. Remount `/sys/fs/cgroup` to apply `MS_RDONLY` when it is being bind-mounted
|
||||
2. Mask `/sys/fs/cgroup` when the bind source is unavailable
|
||||
|
||||
Fix CVE-2023-25809 (GHSA-m8cg-xc2p-r3fc)
|
||||
|
||||
Co-authored-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
|
||||
---
|
||||
libcontainer/rootfs_linux.go | 53 ++++++++++++++++++++++-------------
|
||||
tests/integration/mounts.bats | 17 +++++++++++
|
||||
2 files changed, 51 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
||||
index 280a6332..ec14f97e 100644
|
||||
--- a/libcontainer/rootfs_linux.go
|
||||
+++ b/libcontainer/rootfs_linux.go
|
||||
@@ -334,26 +334,41 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
|
||||
if err := os.MkdirAll(dest, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
- return utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
||||
- if err := mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||
- // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||
- if errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY) {
|
||||
- src := fs2.UnifiedMountpoint
|
||||
- if c.cgroupns && c.cgroup2Path != "" {
|
||||
- // Emulate cgroupns by bind-mounting
|
||||
- // the container cgroup path rather than
|
||||
- // the whole /sys/fs/cgroup.
|
||||
- src = c.cgroup2Path
|
||||
- }
|
||||
- err = mount(src, m.Destination, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||
- if c.rootlessCgroups && errors.Is(err, unix.ENOENT) {
|
||||
- err = nil
|
||||
- }
|
||||
- }
|
||||
- return err
|
||||
- }
|
||||
- return nil
|
||||
+ err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
||||
+ return mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data)
|
||||
})
|
||||
+ if err == nil || !(errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY)) {
|
||||
+ return err
|
||||
+ }
|
||||
+
|
||||
+ // When we are in UserNS but CgroupNS is not unshared, we cannot mount
|
||||
+ // cgroup2 (#2158), so fall back to bind mount.
|
||||
+ bindM := &configs.Mount{
|
||||
+ Device: "bind",
|
||||
+ Source: fs2.UnifiedMountpoint,
|
||||
+ Destination: m.Destination,
|
||||
+ Flags: unix.MS_BIND | m.Flags,
|
||||
+ PropagationFlags: m.PropagationFlags,
|
||||
+ }
|
||||
+ if c.cgroupns && c.cgroup2Path != "" {
|
||||
+ // Emulate cgroupns by bind-mounting the container cgroup path
|
||||
+ // rather than the whole /sys/fs/cgroup.
|
||||
+ bindM.Source = c.cgroup2Path
|
||||
+ }
|
||||
+ // mountToRootfs() handles remounting for MS_RDONLY.
|
||||
+ // No need to set c.fd here, because mountToRootfs() calls utils.WithProcfd() by itself in mountPropagate().
|
||||
+ err = mountToRootfs(bindM, c)
|
||||
+ if c.rootlessCgroups && errors.Is(err, unix.ENOENT) {
|
||||
+ // ENOENT (for `src = c.cgroup2Path`) happens when rootless runc is being executed
|
||||
+ // outside the userns+mountns.
|
||||
+ //
|
||||
+ // Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted
|
||||
+ // with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`).
|
||||
+ err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
|
||||
+ return maskPath(procfd, c.label)
|
||||
+ })
|
||||
+ }
|
||||
+ return err
|
||||
}
|
||||
|
||||
func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||
diff --git a/tests/integration/mounts.bats b/tests/integration/mounts.bats
|
||||
index 1ec675ac..1e72c5b1 100644
|
||||
--- a/tests/integration/mounts.bats
|
||||
+++ b/tests/integration/mounts.bats
|
||||
@@ -63,3 +63,20 @@ function teardown() {
|
||||
runc run test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
+
|
||||
+# https://github.com/opencontainers/runc/security/advisories/GHSA-m8cg-xc2p-r3fc
|
||||
+@test "runc run [ro /sys/fs/cgroup mount]" {
|
||||
+ # With cgroup namespace
|
||||
+ update_config '.process.args |= ["sh", "-euc", "for f in `grep /sys/fs/cgroup /proc/mounts | awk \"{print \\\\$2}\"| uniq`; do grep -w $f /proc/mounts | tail -n1; done"]'
|
||||
+ runc run test_busybox
|
||||
+ [ "$status" -eq 0 ]
|
||||
+ [ "${#lines[@]}" -ne 0 ]
|
||||
+ for line in "${lines[@]}"; do [[ "${line}" == *'ro,'* ]]; done
|
||||
+
|
||||
+ # Without cgroup namespace
|
||||
+ update_config '.linux.namespaces -= [{"type": "cgroup"}]'
|
||||
+ runc run test_busybox
|
||||
+ [ "$status" -eq 0 ]
|
||||
+ [ "${#lines[@]}" -ne 0 ]
|
||||
+ for line in "${lines[@]}"; do [[ "${line}" == *'ro,'* ]]; done
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
113
patch/0039-runc-Prohibit-proc-and-sys-to-be-symlinks.patch
Normal file
113
patch/0039-runc-Prohibit-proc-and-sys-to-be-symlinks.patch
Normal file
@ -0,0 +1,113 @@
|
||||
From 52559766c5298688a8302180bf50b002623776d9 Mon Sep 17 00:00:00 2001
|
||||
From: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
Date: Thu, 16 Mar 2023 14:35:50 -0700
|
||||
Subject: [PATCH] Prohibit /proc and /sys to be symlinks
|
||||
|
||||
Commit 3291d66b9844 introduced a check for /proc and /sys, making sure
|
||||
the destination (dest) is a directory (and not e.g. a symlink).
|
||||
|
||||
Later, a hunk from commit 0ca91f44f switched from using filepath.Join
|
||||
to SecureJoin for dest. As SecureJoin follows and resolves symlinks,
|
||||
the check whether dest is a symlink no longer works.
|
||||
|
||||
To fix, do the check without/before using SecureJoin.
|
||||
|
||||
Add integration tests to make sure we won't regress.
|
||||
|
||||
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
(cherry picked from commit 0d72adf96dda1b687815bf89bb245b937a2f603c)
|
||||
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
|
||||
---
|
||||
libcontainer/rootfs_linux.go | 29 ++++++++++++++++++++---------
|
||||
tests/integration/mask.bats | 19 +++++++++++++++++++
|
||||
2 files changed, 39 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
||||
index ec14f97e..8ce09f6f 100644
|
||||
--- a/libcontainer/rootfs_linux.go
|
||||
+++ b/libcontainer/rootfs_linux.go
|
||||
@@ -418,25 +418,26 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||
|
||||
func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
||||
rootfs := c.root
|
||||
- mountLabel := c.label
|
||||
- mountFd := c.fd
|
||||
- dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||
- if err != nil {
|
||||
- return err
|
||||
- }
|
||||
|
||||
+ // procfs and sysfs are special because we need to ensure they are actually
|
||||
+ // mounted on a specific path in a container without any funny business.
|
||||
switch m.Device {
|
||||
case "proc", "sysfs":
|
||||
// If the destination already exists and is not a directory, we bail
|
||||
- // out This is to avoid mounting through a symlink or similar -- which
|
||||
+ // out. This is to avoid mounting through a symlink or similar -- which
|
||||
// has been a "fun" attack scenario in the past.
|
||||
// TODO: This won't be necessary once we switch to libpathrs and we can
|
||||
// stop all of these symlink-exchange attacks.
|
||||
+ dest := filepath.Clean(m.Destination)
|
||||
+ if !strings.HasPrefix(dest, rootfs) {
|
||||
+ // Do not use securejoin as it resolves symlinks.
|
||||
+ dest = filepath.Join(rootfs, dest)
|
||||
+ }
|
||||
if fi, err := os.Lstat(dest); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
- } else if fi.Mode()&os.ModeDir == 0 {
|
||||
+ } else if !fi.IsDir() {
|
||||
return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device)
|
||||
}
|
||||
if strings.HasPrefix(m.Destination, "/proc/sys/") {
|
||||
@@ -445,8 +446,18 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
||||
if err := os.MkdirAll(dest, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
- // Selinux kernels do not support labeling of /proc or /sys
|
||||
+ // Selinux kernels do not support labeling of /proc or /sys.
|
||||
return mountPropagate(m, rootfs, "", nil)
|
||||
+ }
|
||||
+
|
||||
+ mountLabel := c.label
|
||||
+ mountFd := c.fd
|
||||
+ dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||
+ if err != nil {
|
||||
+ return err
|
||||
+ }
|
||||
+
|
||||
+ switch m.Device {
|
||||
case "mqueue":
|
||||
if err := os.MkdirAll(dest, 0o755); err != nil {
|
||||
return err
|
||||
diff --git a/tests/integration/mask.bats b/tests/integration/mask.bats
|
||||
index b5f29675..272c879c 100644
|
||||
--- a/tests/integration/mask.bats
|
||||
+++ b/tests/integration/mask.bats
|
||||
@@ -56,3 +56,22 @@ function teardown() {
|
||||
[ "$status" -eq 1 ]
|
||||
[[ "${output}" == *"Operation not permitted"* ]]
|
||||
}
|
||||
+
|
||||
+@test "mask paths [prohibit symlink /proc]" {
|
||||
+ ln -s /symlink rootfs/proc
|
||||
+ runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
|
||||
+ [ "$status" -eq 1 ]
|
||||
+ [[ "${output}" == *"must be mounted on ordinary directory"* ]]
|
||||
+}
|
||||
+
|
||||
+@test "mask paths [prohibit symlink /sys]" {
|
||||
+ # In rootless containers, /sys is a bind mount not a real sysfs.
|
||||
+ requires root
|
||||
+
|
||||
+ ln -s /symlink rootfs/sys
|
||||
+ runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
|
||||
+ [ "$status" -eq 1 ]
|
||||
+ # On cgroup v1, this may fail before checking if /sys is a symlink,
|
||||
+ # so we merely check that it fails, and do not check the exact error
|
||||
+ # message like for /proc above.
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
|
||||
Name: docker-runc
|
||||
Version: 1.1.3
|
||||
Release: 13
|
||||
Release: 14
|
||||
Summary: runc is a CLI tool for spawning and running containers according to the OCI specification.
|
||||
|
||||
License: ASL 2.0
|
||||
@ -58,6 +58,12 @@ install -p -m 755 runc $RPM_BUILD_ROOT/%{_bindir}/runc
|
||||
%{_bindir}/runc
|
||||
|
||||
%changelog
|
||||
* Mon Apr 3 2023 zhongjiawei<zhongjiawei1@huawei.com> - 1.1.3-14
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
- SUG:NA
|
||||
- DESC:fix rootless /sys/fs/cgroup mounts bug and Prohibit /proc and /sys to be symlinks
|
||||
|
||||
* Tue Mar 21 2023 zhongjiawei<zhongjiawei1@huawei.com> - 1.1.3-13
|
||||
- Type:bugfix
|
||||
- CVE:NA
|
||||
|
||||
@ -33,3 +33,5 @@ patch/0032-runc-make-runc-spec-compatible-1.0.0.rc3.patch
|
||||
patch/0033-add-loongarch-support-for-libcontainer.patch
|
||||
patch/0036-runc-libcontainer-skip-chown-of-dev-null-caused-by-fd-red.patch
|
||||
patch/0037-runc-Fixed-init-state-error-variable.patch
|
||||
patch/0038-runc-rootless-fix-sys-fs-cgroup-mounts.patch
|
||||
patch/0039-runc-Prohibit-proc-and-sys-to-be-symlinks.patch
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user