Signed-off-by: Song Zhang <zhangsong34@huawei.com> (cherry picked from commit ab4cc0729c31453147018b290e97d51db51f3c13)
357 lines
13 KiB
Diff
357 lines
13 KiB
Diff
From ef3e5b4ea8f0b2eaac05df8b16f6656aebf05998 Mon Sep 17 00:00:00 2001
|
|
From: Aleksa Sarai <cyphar@cyphar.com>
|
|
Date: Tue, 2 Jul 2024 20:58:43 +1000
|
|
Subject: [PATCH 2/2] rootfs: try to scope MkdirAll to stay inside the rootfs
|
|
|
|
While we use SecureJoin to try to make all of our target paths inside
|
|
the container safe, SecureJoin is not safe against an attacker than can
|
|
change the path after we "resolve" it.
|
|
|
|
os.MkdirAll can inadvertently follow symlinks and thus an attacker could
|
|
end up tricking runc into creating empty directories on the host (note
|
|
that the container doesn't get access to these directories, and the host
|
|
just sees empty directories). However, this could potentially cause DoS
|
|
issues by (for instance) creating a directory in a conf.d directory for
|
|
a daemon that doesn't handle subdirectories properly.
|
|
|
|
In addition, the handling for creating file bind-mounts did a plain
|
|
open(O_CREAT) on the SecureJoin'd path, which is even more obviously
|
|
unsafe (luckily we didn't use O_TRUNC, or this bug could've allowed an
|
|
attacker to cause data loss...). Regardless of the symlink issue,
|
|
opening an untrusted file could result in a DoS if the file is a hung
|
|
tty or some other "nasty" file. We can use mknodat to safely create a
|
|
regular file without opening anything anyway (O_CREAT|O_EXCL would also
|
|
work but it makes the logic a bit more complicated, and we don't want to
|
|
open the file for any particular reason anyway).
|
|
|
|
libpathrs[1] is the long-term solution for these kinds of problems, but
|
|
for now we can patch this particular issue by creating a more restricted
|
|
MkdirAll that refuses to resolve symlinks and does the creation using
|
|
file descriptors. This is loosely based on a more secure version that
|
|
filepath-securejoin now has[2] and will be added to libpathrs soon[3].
|
|
|
|
[1]: https://github.com/openSUSE/libpathrs
|
|
[2]: https://github.com/cyphar/filepath-securejoin/releases/tag/v0.3.0
|
|
[3]: https://github.com/openSUSE/libpathrs/issues/10
|
|
|
|
Fixes: CVE-2024-45310
|
|
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
|
---
|
|
libcontainer/mount_linux.go | 18 +++++
|
|
libcontainer/rootfs_linux.go | 33 ++++++---
|
|
libcontainer/system/linux.go | 41 +++++++++++
|
|
libcontainer/utils/utils_unix.go | 112 +++++++++++++++++++++++++++++++
|
|
4 files changed, 193 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/libcontainer/mount_linux.go b/libcontainer/mount_linux.go
|
|
index 5f49de9..948b6c0 100644
|
|
--- a/libcontainer/mount_linux.go
|
|
+++ b/libcontainer/mount_linux.go
|
|
@@ -1,6 +1,7 @@
|
|
package libcontainer
|
|
|
|
import (
|
|
+ "io/fs"
|
|
"strconv"
|
|
|
|
"golang.org/x/sys/unix"
|
|
@@ -81,3 +82,20 @@ func unmount(target string, flags int) error {
|
|
}
|
|
return nil
|
|
}
|
|
+
|
|
+// syscallMode returns the syscall-specific mode bits from Go's portable mode bits.
|
|
+// Copy from https://cs.opensource.google/go/go/+/refs/tags/go1.20.7:src/os/file_posix.go;l=61-75
|
|
+func syscallMode(i fs.FileMode) (o uint32) {
|
|
+ o |= uint32(i.Perm())
|
|
+ if i&fs.ModeSetuid != 0 {
|
|
+ o |= unix.S_ISUID
|
|
+ }
|
|
+ if i&fs.ModeSetgid != 0 {
|
|
+ o |= unix.S_ISGID
|
|
+ }
|
|
+ if i&fs.ModeSticky != 0 {
|
|
+ o |= unix.S_ISVTX
|
|
+ }
|
|
+ // No mapping for Go's ModeTemporary (plan9 only).
|
|
+ return
|
|
+}
|
|
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
|
index ea554d3..4678c76 100644
|
|
--- a/libcontainer/rootfs_linux.go
|
|
+++ b/libcontainer/rootfs_linux.go
|
|
@@ -254,7 +254,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
|
|
if c.cgroupns {
|
|
subsystemPath := filepath.Join(c.root, b.Destination)
|
|
subsystemName := filepath.Base(b.Destination)
|
|
- if err := os.MkdirAll(subsystemPath, 0o755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil {
|
|
return err
|
|
}
|
|
if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error {
|
|
@@ -383,7 +383,7 @@ func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source stri
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
- if err := checkProcMount(rootfs, dest, m, source); err != nil {
|
|
+ if err := checkProcMount(rootfs, dest, source); err != nil {
|
|
return "", fmt.Errorf("check proc-safety of %s mount: %w", m.Destination, err)
|
|
}
|
|
|
|
@@ -407,15 +407,26 @@ func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source stri
|
|
return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile)
|
|
}
|
|
// Make the parent directory.
|
|
- if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
|
|
+ destDir, destBase := filepath.Split(dest)
|
|
+ destDirFd, err := utils.MkdirAllInRootOpen(rootfs, destDir, 0o755)
|
|
+ if err != nil {
|
|
return "", fmt.Errorf("make parent dir of file bind-mount: %w", err)
|
|
}
|
|
- // Make the target file.
|
|
- f, err := os.OpenFile(dest, os.O_CREATE, 0o755)
|
|
- if err != nil {
|
|
- return "", fmt.Errorf("create target of file bind-mount: %w", err)
|
|
+ defer destDirFd.Close()
|
|
+ // Make the target file. We want to avoid opening any file that is
|
|
+ // already there because it could be a "bad" file like an invalid
|
|
+ // device or hung tty that might cause a DoS, so we use mknodat.
|
|
+ // destBase does not contain any "/" components, and mknodat does
|
|
+ // not follow trailing symlinks, so we can safely just call mknodat
|
|
+ // here.
|
|
+ if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil {
|
|
+ // If we get EEXIST, there was already an inode there and
|
|
+ // we can consider that a success.
|
|
+ if !errors.Is(err, unix.EEXIST) {
|
|
+ err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err}
|
|
+ return "", fmt.Errorf("create target of file bind-mount: %w", err)
|
|
+ }
|
|
}
|
|
- _ = f.Close()
|
|
// Nothing left to do.
|
|
return dest, nil
|
|
}
|
|
@@ -434,7 +445,7 @@ func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source stri
|
|
}
|
|
}
|
|
|
|
- if err := os.MkdirAll(dest, 0o755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
|
|
return "", err
|
|
}
|
|
return dest, nil
|
|
@@ -467,7 +478,7 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
|
|
if strings.HasPrefix(m.Destination, "/proc/sys/") {
|
|
return nil
|
|
}
|
|
- if err := os.MkdirAll(dest, 0o755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
|
|
return err
|
|
}
|
|
// Selinux kernels do not support labeling of /proc or /sys.
|
|
@@ -735,7 +746,7 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
|
|
if dest == rootfs {
|
|
return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile)
|
|
}
|
|
- if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil {
|
|
return err
|
|
}
|
|
if bind {
|
|
diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go
|
|
index e1d6eb1..0f97045 100644
|
|
--- a/libcontainer/system/linux.go
|
|
+++ b/libcontainer/system/linux.go
|
|
@@ -6,6 +6,8 @@ package system
|
|
import (
|
|
"os"
|
|
"os/exec"
|
|
+ "runtime"
|
|
+ "strings"
|
|
"unsafe"
|
|
|
|
"golang.org/x/sys/unix"
|
|
@@ -102,3 +104,42 @@ func GetSubreaper() (int, error) {
|
|
|
|
return int(i), nil
|
|
}
|
|
+
|
|
+func prepareAt(dir *os.File, path string) (int, string) {
|
|
+ if dir == nil {
|
|
+ return unix.AT_FDCWD, path
|
|
+ }
|
|
+
|
|
+ // Rather than just filepath.Join-ing path here, do it manually so the
|
|
+ // error and handle correctly indicate cases like path=".." as being
|
|
+ // relative to the correct directory. The handle.Name() might end up being
|
|
+ // wrong but because this is (currently) only used in MkdirAllInRoot, that
|
|
+ // isn't a problem.
|
|
+ dirName := dir.Name()
|
|
+ if !strings.HasSuffix(dirName, "/") {
|
|
+ dirName += "/"
|
|
+ }
|
|
+ fullPath := dirName + path
|
|
+
|
|
+ return int(dir.Fd()), fullPath
|
|
+}
|
|
+
|
|
+func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
|
|
+ dirFd, fullPath := prepareAt(dir, path)
|
|
+ fd, err := unix.Openat(dirFd, path, flags, mode)
|
|
+ if err != nil {
|
|
+ return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err}
|
|
+ }
|
|
+ runtime.KeepAlive(dir)
|
|
+ return os.NewFile(uintptr(fd), fullPath), nil
|
|
+}
|
|
+
|
|
+func Mkdirat(dir *os.File, path string, mode uint32) error {
|
|
+ dirFd, fullPath := prepareAt(dir, path)
|
|
+ err := unix.Mkdirat(dirFd, path, mode)
|
|
+ if err != nil {
|
|
+ err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err}
|
|
+ }
|
|
+ runtime.KeepAlive(dir)
|
|
+ return err
|
|
+}
|
|
diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go
|
|
index 6fe0096..66d12e5 100644
|
|
--- a/libcontainer/utils/utils_unix.go
|
|
+++ b/libcontainer/utils/utils_unix.go
|
|
@@ -4,6 +4,7 @@
|
|
package utils
|
|
|
|
import (
|
|
+ "errors"
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
@@ -14,6 +15,8 @@ import (
|
|
"sync"
|
|
_ "unsafe" // for go:linkname
|
|
|
|
+ "github.com/opencontainers/runc/libcontainer/system"
|
|
+
|
|
securejoin "github.com/cyphar/filepath-securejoin"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
@@ -171,6 +174,115 @@ func IsLexicallyInRoot(root, path string) bool {
|
|
return strings.HasPrefix(path, root)
|
|
}
|
|
|
|
+// MkdirAllInRootOpen attempts to make
|
|
+//
|
|
+// path, _ := securejoin.SecureJoin(root, unsafePath)
|
|
+// os.MkdirAll(path, mode)
|
|
+// os.Open(path)
|
|
+//
|
|
+// safer against attacks where components in the path are changed between
|
|
+// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
|
|
+// try to detect any symlink components in the path while we are doing the
|
|
+// MkdirAll.
|
|
+//
|
|
+// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode
|
|
+// (the suid/sgid/sticky bits are not the same as for os.FileMode).
|
|
+//
|
|
+// NOTE: If unsafePath is a subpath of root, we assume that you have already
|
|
+// called SecureJoin and so we use the provided path verbatim without resolving
|
|
+// any symlinks (this is done in a way that avoids symlink-exchange races).
|
|
+// This means that the path also must not contain ".." elements, otherwise an
|
|
+// error will occur.
|
|
+//
|
|
+// This is a somewhat less safe alternative to
|
|
+// <https://github.com/cyphar/filepath-securejoin/pull/13>, but it should
|
|
+// detect attempts to trick us into creating directories outside of the root.
|
|
+// We should migrate to securejoin.MkdirAll once it is merged.
|
|
+func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) {
|
|
+ // If the path is already "within" the root, use it verbatim.
|
|
+ fullPath := unsafePath
|
|
+ if !IsLexicallyInRoot(root, unsafePath) {
|
|
+ var err error
|
|
+ fullPath, err = securejoin.SecureJoin(root, unsafePath)
|
|
+ if err != nil {
|
|
+ return nil, err
|
|
+ }
|
|
+ }
|
|
+ subPath, err := filepath.Rel(root, fullPath)
|
|
+ if err != nil {
|
|
+ return nil, err
|
|
+ }
|
|
+
|
|
+ // Check for any silly mode bits.
|
|
+ if mode&^0o7777 != 0 {
|
|
+ return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
|
|
+ }
|
|
+
|
|
+ currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
|
+ if err != nil {
|
|
+ return nil, fmt.Errorf("open root handle: %w", err)
|
|
+ }
|
|
+ defer func() {
|
|
+ if Err != nil {
|
|
+ currentDir.Close()
|
|
+ }
|
|
+ }()
|
|
+
|
|
+ for _, part := range strings.Split(subPath, string(filepath.Separator)) {
|
|
+ switch part {
|
|
+ case "", ".":
|
|
+ // Skip over no-op components.
|
|
+ continue
|
|
+ case "..":
|
|
+ return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath)
|
|
+ }
|
|
+
|
|
+ nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
|
|
+ switch {
|
|
+ case err == nil:
|
|
+ // Update the currentDir.
|
|
+ _ = currentDir.Close()
|
|
+ currentDir = nextDir
|
|
+
|
|
+ case errors.Is(err, unix.ENOTDIR):
|
|
+ // This might be a symlink or some other random file. Either way,
|
|
+ // error out.
|
|
+ return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR)
|
|
+
|
|
+ case errors.Is(err, os.ErrNotExist):
|
|
+ // Luckily, mkdirat will not follow trailing symlinks, so this is
|
|
+ // safe to do as-is.
|
|
+ if err := system.Mkdirat(currentDir, part, mode); err != nil {
|
|
+ return nil, err
|
|
+ }
|
|
+ // Open the new directory. There is a race here where an attacker
|
|
+ // could swap the directory with a different directory, but
|
|
+ // MkdirAll's fuzzy semantics mean we don't care about that.
|
|
+ nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
|
|
+ if err != nil {
|
|
+ return nil, fmt.Errorf("open newly created directory: %w", err)
|
|
+ }
|
|
+ // Update the currentDir.
|
|
+ _ = currentDir.Close()
|
|
+ currentDir = nextDir
|
|
+
|
|
+ default:
|
|
+ return nil, err
|
|
+ }
|
|
+ }
|
|
+ return currentDir, nil
|
|
+}
|
|
+
|
|
+// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
|
|
+// returned handle, for callers that don't need to use it.
|
|
+func MkdirAllInRoot(root, unsafePath string, mode uint32) error {
|
|
+ f, err := MkdirAllInRootOpen(root, unsafePath, mode)
|
|
+ if err == nil {
|
|
+ _ = f.Close()
|
|
+ }
|
|
+ return err
|
|
+}
|
|
+
|
|
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
|
// corresponding to the unsafePath resolved within the root. Before passing the
|
|
// fd, this path is verified to have been inside the root -- so operating on it
|
|
--
|
|
2.33.0
|
|
|