lxc/0021-cgroup-add-retry-for-destory-cgroups.patch

388 lines
10 KiB
Diff
Raw Normal View History

From 4814d02fd3d364b599707b3cb298a8cc945033f9 Mon Sep 17 00:00:00 2001
From: LiFeng <lifeng68@huawei.com>
Date: Tue, 14 Apr 2020 17:07:24 +0800
Subject: [PATCH 21/49] cgroup: add retry for destory cgroups
Signed-off-by: LiFeng <lifeng68@huawei.com>
---
src/lxc/cgroups/cgfsng.c | 105 +++++++++++++++++++++++++
src/lxc/cgroups/cgroup.h | 4 +
src/lxc/start.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 303 insertions(+), 1 deletion(-)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 881dd39..00270ab 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1000,6 +1000,106 @@ static int cgroup_tree_remove_wrapper(void *data)
return cgroup_tree_remove(arg->hierarchies, arg->container_cgroup);
}
+#ifdef HAVE_ISULAD
+
+static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies,
+ const char *container_cgroup)
+{
+ if (!container_cgroup || !hierarchies)
+ return 0;
+
+ for (int i = 0; hierarchies[i]; i++) {
+ struct hierarchy *h = hierarchies[i];
+ int ret;
+
+ if (!h->container_full_path)
+ continue;
+
+ ret = lxc_rm_rf(h->container_full_path);
+ if (ret < 0) {
+ SYSERROR("Failed to destroy \"%s\"", h->container_full_path);
+ return -1;
+ }
+
+ free_disarm(h->container_full_path);
+ }
+
+ return 0;
+}
+
+static int isulad_cgroup_tree_remove_wrapper(void *data)
+{
+ struct generic_userns_exec_data *arg = data;
+ uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+ gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+ int ret;
+
+ if (!lxc_setgroups(0, NULL) && errno != EPERM)
+ return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
+
+ ret = setresgid(nsgid, nsgid, nsgid);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)",
+ (int)nsgid, (int)nsgid, (int)nsgid);
+
+ ret = setresuid(nsuid, nsuid, nsuid);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)",
+ (int)nsuid, (int)nsuid, (int)nsuid);
+
+ return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup);
+}
+
+__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ int ret;
+
+ if (!ops) {
+ ERROR("Called with uninitialized cgroup operations");
+ return false;
+ }
+
+ if (!ops->hierarchies) {
+ return false;
+ }
+
+ if (!handler) {
+ ERROR("Called with uninitialized handler");
+ return false;
+ }
+
+ if (!handler->conf) {
+ ERROR("Called with uninitialized conf");
+ return false;
+ }
+
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+ ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices);
+ if (ret < 0)
+ WARN("Failed to detach bpf program from cgroup");
+#endif
+
+ if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) {
+ struct generic_userns_exec_data wrap = {
+ .conf = handler->conf,
+ .container_cgroup = ops->container_cgroup,
+ .hierarchies = ops->hierarchies,
+ .origuid = 0,
+ };
+ ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper,
+ &wrap, "cgroup_tree_remove_wrapper");
+ } else {
+ ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup);
+ }
+ if (ret < 0) {
+ SYSWARN("Failed to destroy cgroups");
+ return false;
+ }
+
+ return true;
+}
+#else
__cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
@@ -1044,6 +1144,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
if (ret < 0)
SYSWARN("Failed to destroy cgroups");
}
+#endif
#ifdef HAVE_ISULAD
__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
@@ -4107,7 +4208,11 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
return NULL;
cgfsng_ops->data_init = cgfsng_data_init;
+#ifdef HAVE_ISULAD
+ cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy;
+#else
cgfsng_ops->payload_destroy = cgfsng_payload_destroy;
+#endif
cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
cgfsng_ops->monitor_create = cgfsng_monitor_create;
cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index dcdc76b..a9048c4 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -144,7 +144,11 @@ struct cgroup_ops {
cgroup_layout_t cgroup_layout;
int (*data_init)(struct cgroup_ops *ops, struct lxc_conf *conf);
+#ifdef HAVE_ISULAD
+ bool (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
+#else
void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
+#endif
void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*monitor_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 800f884..0942c31 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -879,6 +879,170 @@ out_restore_sigmask:
return -1;
}
+#ifdef HAVE_ISULAD
+void trim_line(char *s)
+{
+ size_t len;
+
+ len = strlen(s);
+ while ((len > 1) && (s[len - 1] == '\n'))
+ s[--len] = '\0';
+}
+
+static int _read_procs_file(const char *path, pid_t **pids, size_t *len)
+{
+ FILE *f;
+ char *line = NULL;
+ size_t sz = 0;
+ pid_t *tmp_pids = NULL;
+
+ f = fopen_cloexec(path, "r");
+ if (!f)
+ return -1;
+
+ while (getline(&line, &sz, f) != -1) {
+ pid_t pid;
+ trim_line(line);
+ pid = (pid_t)atoll(line);
+ if (lxc_mem_realloc((void **)&tmp_pids, sizeof(pid_t) * (*len + 1), *pids, sizeof(pid_t) * (*len)) != 0) {
+ free(*pids);
+ *pids = NULL;
+ ERROR("out of memory");
+ free(line);
+ fclose(f);
+ return -1;
+ }
+ *pids = tmp_pids;
+
+ (*pids)[*len] = pid;
+ (*len)++;
+ }
+
+ free(line);
+ fclose(f);
+ return 0;
+}
+
+static int _recursive_read_cgroup_procs(const char *dirpath, pid_t **pids, size_t *len)
+{
+ struct dirent *direntp = NULL;
+ DIR *dir = NULL;
+ int ret, failed = 0;
+ char pathname[PATH_MAX];
+
+ dir = opendir(dirpath);
+ if (dir == NULL) {
+ WARN("Failed to open \"%s\"", dirpath);
+ return 0;
+ }
+
+ while ((direntp = readdir(dir))) {
+ struct stat mystat;
+ int rc;
+
+ if (!strcmp(direntp->d_name, ".") ||
+ !strcmp(direntp->d_name, ".."))
+ continue;
+
+ rc = snprintf(pathname, PATH_MAX, "%s/%s", dirpath, direntp->d_name);
+ if (rc < 0 || rc >= PATH_MAX) {
+ failed = 1;
+ continue;
+ }
+
+ if (strcmp(direntp->d_name, "cgroup.procs") == 0) {
+ if (_read_procs_file(pathname, pids, len)) {
+ failed = 1;
+
+ }
+ continue;
+ }
+
+ ret = lstat(pathname, &mystat);
+ if (ret) {
+ failed = 1;
+ continue;
+ }
+
+ if (S_ISDIR(mystat.st_mode)) {
+ if (_recursive_read_cgroup_procs(pathname, pids, len) < 0)
+ failed = 1;
+ }
+ }
+
+ ret = closedir(dir);
+ if (ret) {
+ WARN("Failed to close directory \"%s\"", dirpath);
+ failed = 1;
+ }
+
+ return failed ? -1 : 0;
+}
+
+int get_all_pids(struct cgroup_ops *cg_ops, pid_t **pids, size_t *len)
+{
+ const char *devices_path = NULL;
+
+ devices_path = cg_ops->get_cgroup_full_path(cg_ops, "devices");
+ if (!file_exists(devices_path)) {
+ return 0;
+ }
+
+ return _recursive_read_cgroup_procs(devices_path, pids, len);
+}
+
+static int set_cgroup_freezer(struct cgroup_ops *cg_ops, const char *value)
+{
+ char *fullpath;
+ int ret;
+
+ fullpath = must_make_path(cg_ops->get_cgroup_full_path(cg_ops, "freezer"), "freezer.state", NULL);
+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+ free(fullpath);
+ return ret;
+}
+
+/* isulad: kill all process in container cgroup path */
+static void signal_all_processes(struct lxc_handler *handler)
+{
+ int ret;
+ struct cgroup_ops *cg_ops = handler->cgroup_ops;
+ pid_t *pids = NULL;
+ size_t len = 0, i;
+
+ ret = set_cgroup_freezer(cg_ops, "FROZEN");
+ if (ret < 0 && errno != ENOENT) {
+ WARN("cgroup_set frozen failed");
+ }
+
+ ret = get_all_pids(cg_ops, &pids, &len);
+ if (ret < 0) {
+ WARN("failed to get all pids");
+ }
+
+ for (i = 0; i < len; i++) {
+ ret = kill(pids[i], SIGKILL);
+ if (ret < 0 && errno != ESRCH) {
+ WARN("Can not kill process (pid=%d) with SIGKILL for container %s", pids[i], handler->name);
+ }
+ }
+
+ ret = set_cgroup_freezer(cg_ops, "THAWED");
+ if (ret < 0 && errno != ENOENT) {
+ WARN("cgroup_set thawed failed");
+ }
+
+ for (i = 0; i < len; i++) {
+ ret = lxc_wait_for_pid_status(pids[i]);
+ if (ret < 0 && errno != ECHILD) {
+ WARN("Failed to wait pid %d for container %s: %s", pids[i], handler->name, strerror(errno));
+ }
+ }
+
+ free(pids);
+}
+#endif
+
void lxc_end(struct lxc_handler *handler)
{
int ret;
@@ -952,11 +1116,37 @@ void lxc_end(struct lxc_handler *handler)
lsm_process_cleanup(handler->conf, handler->lxcpath);
+#ifdef HAVE_ISULAD
+ // close maincmd fd before destroy cgroup for isulad
+ if (handler->conf->reboot == REBOOT_NONE) {
+ /* For all new state clients simply close the command socket.
+ * This will inform all state clients that the container is
+ * STOPPED and also prevents a race between a open()/close() on
+ * the command socket causing a new process to get ECONNREFUSED
+ * because we haven't yet closed the command socket.
+ */
+ close_prot_errno_disarm(handler->conf->maincmd_fd);
+ TRACE("Closed command socket");
+ }
+ int retry_count = 0;
+ int max_retry = 10;
+retry:
+ if (cgroup_ops != NULL && !cgroup_ops->payload_destroy(cgroup_ops, handler)) {
+ TRACE("Trying to kill all subprocess");
+ signal_all_processes(handler);
+ TRACE("Finished kill all subprocess");
+ if (retry_count < max_retry) {
+ usleep(100 * 1000); /* 100 millisecond */
+ retry_count++;
+ goto retry;
+ }
+ SYSERROR("Failed to destroy cgroup path for container: \"%s\"", handler->name);
+ }
+#else
if (cgroup_ops) {
cgroup_ops->payload_destroy(cgroup_ops, handler);
cgroup_ops->monitor_destroy(cgroup_ops, handler);
}
-
if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket.
* This will inform all state clients that the container is
@@ -966,7 +1156,10 @@ void lxc_end(struct lxc_handler *handler)
*/
close_prot_errno_disarm(handler->conf->maincmd_fd);
TRACE("Closed command socket");
+ }
+#endif
+ if (handler->conf->reboot == REBOOT_NONE) {
/* This function will try to connect to the legacy lxc-monitord
* state server and only exists for backwards compatibility.
*/
--
1.8.3.1