177 lines
6.4 KiB
Diff
177 lines
6.4 KiB
Diff
From a1f4fd387603673a79a84ca4e5ce25b439b85fe6 Mon Sep 17 00:00:00 2001
|
|
From: Yu Watanabe <watanabe.yu+github@gmail.com>
|
|
Date: Wed, 16 Mar 2022 20:46:49 +0900
|
|
Subject: [PATCH] udev: run the main process, workers, and spawned commands in
|
|
/udev subcgroup
|
|
|
|
And enable cgroup delegation for udevd.
|
|
Then, processes invoked through ExecReload= are assigned .control
|
|
subcgroup, and they are not killed by cg_kill().
|
|
|
|
Fixes #16867 and #22686.
|
|
---
|
|
src/udev/udevd.c | 76 ++++++++++++++++++++++++++--------
|
|
units/systemd-udevd.service.in | 1 +
|
|
2 files changed, 59 insertions(+), 18 deletions(-)
|
|
|
|
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
|
|
index 8380d674c5..c6f6d945c8 100644
|
|
--- a/src/udev/udevd.c
|
|
+++ b/src/udev/udevd.c
|
|
@@ -28,6 +28,7 @@
|
|
#include "sd-event.h"
|
|
|
|
#include "alloc-util.h"
|
|
+#include "cgroup-setup.h"
|
|
#include "cgroup-util.h"
|
|
#include "cpu-set-util.h"
|
|
#include "dev-setup.h"
|
|
@@ -48,6 +49,7 @@
|
|
#include "mkdir.h"
|
|
#include "netlink-util.h"
|
|
#include "parse-util.h"
|
|
+#include "path-util.h"
|
|
#include "pretty-print.h"
|
|
#include "proc-cmdline.h"
|
|
#include "process-util.h"
|
|
@@ -85,7 +87,7 @@ typedef struct Manager {
|
|
sd_event *event;
|
|
Hashmap *workers;
|
|
LIST_HEAD(Event, events);
|
|
- const char *cgroup;
|
|
+ char *cgroup;
|
|
pid_t pid; /* the process that originally allocated the manager object */
|
|
int log_level;
|
|
|
|
@@ -238,6 +240,7 @@ static Manager* manager_free(Manager *manager) {
|
|
safe_close(manager->inotify_fd);
|
|
safe_close_pair(manager->worker_watch);
|
|
|
|
+ free(manager->cgroup);
|
|
return mfree(manager);
|
|
}
|
|
|
|
@@ -1722,12 +1725,63 @@ static int parse_argv(int argc, char *argv[]) {
|
|
return 1;
|
|
}
|
|
|
|
-static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
|
|
+static int create_subcgroup(char **ret) {
|
|
+ _cleanup_free_ char *cgroup = NULL, *subcgroup = NULL;
|
|
+ int r;
|
|
+
|
|
+ if (getppid() != 1)
|
|
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Not invoked by PID1.");
|
|
+
|
|
+ r = sd_booted();
|
|
+ if (r < 0)
|
|
+ return log_debug_errno(r, "Failed to check if systemd is running: %m");
|
|
+ if (r == 0)
|
|
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "systemd is not running.");
|
|
+
|
|
+ /* Get our own cgroup, we regularly kill everything udev has left behind.
|
|
+ * We only do this on systemd systems, and only if we are directly spawned
|
|
+ * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
|
|
+
|
|
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
|
|
+ if (r < 0) {
|
|
+ if (IN_SET(r, -ENOENT, -ENOMEDIUM))
|
|
+ return log_debug_errno(r, "Dedicated cgroup not found: %m");
|
|
+ return log_debug_errno(r, "Failed to get cgroup: %m");
|
|
+ }
|
|
+
|
|
+ r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, cgroup, "trusted.delegate");
|
|
+ if (IN_SET(r, 0, -ENODATA))
|
|
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "The cgroup %s is not delegated to us.", cgroup);
|
|
+ if (r < 0)
|
|
+ return log_debug_errno(r, "Failed to read trusted.delegate attribute: %m");
|
|
+
|
|
+ /* We are invoked with our own delegated cgroup tree, let's move us one level down, so that we
|
|
+ * don't collide with the "no processes in inner nodes" rule of cgroups, when the service
|
|
+ * manager invokes the ExecReload= job in the .control/ subcgroup. */
|
|
+
|
|
+ subcgroup = path_join(cgroup, "/udev");
|
|
+ if (!subcgroup)
|
|
+ return log_oom_debug();
|
|
+
|
|
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup, 0);
|
|
+ if (r < 0)
|
|
+ return log_debug_errno(r, "Failed to create %s subcgroup: %m", subcgroup);
|
|
+
|
|
+ log_debug("Created %s subcgroup.", subcgroup);
|
|
+ if (ret)
|
|
+ *ret = TAKE_PTR(subcgroup);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent) {
|
|
_cleanup_(manager_freep) Manager *manager = NULL;
|
|
+ _cleanup_free_ char *cgroup = NULL;
|
|
int r;
|
|
|
|
assert(ret);
|
|
|
|
+ (void) create_subcgroup(&cgroup);
|
|
+
|
|
manager = new(Manager, 1);
|
|
if (!manager)
|
|
return log_oom();
|
|
@@ -1735,7 +1789,7 @@ static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cg
|
|
*manager = (Manager) {
|
|
.inotify_fd = -1,
|
|
.worker_watch = { -1, -1 },
|
|
- .cgroup = cgroup,
|
|
+ .cgroup = TAKE_PTR(cgroup),
|
|
};
|
|
|
|
r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl);
|
|
@@ -1880,7 +1934,6 @@ static int main_loop(Manager *manager) {
|
|
}
|
|
|
|
int run_udevd(int argc, char *argv[]) {
|
|
- _cleanup_free_ char *cgroup = NULL;
|
|
_cleanup_(manager_freep) Manager *manager = NULL;
|
|
int fd_ctrl = -1, fd_uevent = -1;
|
|
int r;
|
|
@@ -1937,24 +1990,11 @@ int run_udevd(int argc, char *argv[]) {
|
|
if (r < 0 && r != -EEXIST)
|
|
return log_error_errno(r, "Failed to create /run/udev: %m");
|
|
|
|
- if (getppid() == 1 && sd_booted() > 0) {
|
|
- /* Get our own cgroup, we regularly kill everything udev has left behind.
|
|
- * We only do this on systemd systems, and only if we are directly spawned
|
|
- * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
|
|
- r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
|
|
- if (r < 0) {
|
|
- if (IN_SET(r, -ENOENT, -ENOMEDIUM))
|
|
- log_debug_errno(r, "Dedicated cgroup not found: %m");
|
|
- else
|
|
- log_warning_errno(r, "Failed to get cgroup: %m");
|
|
- }
|
|
- }
|
|
-
|
|
r = listen_fds(&fd_ctrl, &fd_uevent);
|
|
if (r < 0)
|
|
return log_error_errno(r, "Failed to listen on fds: %m");
|
|
|
|
- r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
|
|
+ r = manager_new(&manager, fd_ctrl, fd_uevent);
|
|
if (r < 0)
|
|
return log_error_errno(r, "Failed to create manager: %m");
|
|
|
|
diff --git a/units/systemd-udevd.service.in b/units/systemd-udevd.service.in
|
|
index d042bfb0d3..9901198274 100644
|
|
--- a/units/systemd-udevd.service.in
|
|
+++ b/units/systemd-udevd.service.in
|
|
@@ -16,6 +16,7 @@ Before=sysinit.target
|
|
ConditionPathIsReadWrite=/sys
|
|
|
|
[Service]
|
|
+Delegate=pids
|
|
DeviceAllow=block-* rwm
|
|
DeviceAllow=char-* rwm
|
|
Type=notify
|
|
--
|
|
2.27.0
|
|
|