From 4814d02fd3d364b599707b3cb298a8cc945033f9 Mon Sep 17 00:00:00 2001
From: LiFeng <lifeng68@huawei.com>
Date: Tue, 14 Apr 2020 17:07:24 +0800
Subject: [PATCH 21/49] cgroup: add retry for destory cgroups

Signed-off-by: LiFeng <lifeng68@huawei.com>
---
 src/lxc/cgroups/cgfsng.c | 105 +++++++++++++++++++++++++
 src/lxc/cgroups/cgroup.h |   4 +
 src/lxc/start.c          | 195 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 303 insertions(+), 1 deletion(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 881dd39..00270ab 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1000,6 +1000,106 @@ static int cgroup_tree_remove_wrapper(void *data)
 	return cgroup_tree_remove(arg->hierarchies, arg->container_cgroup);
 }
 
+#ifdef HAVE_ISULAD
+
+static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies,
+			const char *container_cgroup)
+{
+	if (!container_cgroup || !hierarchies)
+		return 0;
+
+	for (int i = 0; hierarchies[i]; i++) {
+		struct hierarchy *h = hierarchies[i];
+		int ret;
+
+		if (!h->container_full_path)
+			continue;
+
+		ret = lxc_rm_rf(h->container_full_path);
+		if (ret < 0) {
+			SYSERROR("Failed to destroy \"%s\"", h->container_full_path);
+			return -1;
+		}
+
+		free_disarm(h->container_full_path);
+	}
+
+	return 0;
+}
+
+static int isulad_cgroup_tree_remove_wrapper(void *data)
+{
+	struct generic_userns_exec_data *arg = data;
+	uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+	gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+	int ret;
+
+	if (!lxc_setgroups(0, NULL) && errno != EPERM)
+		return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
+
+	ret = setresgid(nsgid, nsgid, nsgid);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)",
+				       (int)nsgid, (int)nsgid, (int)nsgid);
+
+	ret = setresuid(nsuid, nsuid, nsuid);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)",
+				       (int)nsuid, (int)nsuid, (int)nsuid);
+
+	return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup);
+}
+
+__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops,
+						struct lxc_handler *handler)
+{
+	int ret;
+
+	if (!ops) {
+		ERROR("Called with uninitialized cgroup operations");
+		return false;
+	}
+
+	if (!ops->hierarchies) {
+		return false;
+	}
+
+	if (!handler) {
+		ERROR("Called with uninitialized handler");
+		return false;
+	}
+
+	if (!handler->conf) {
+		ERROR("Called with uninitialized conf");
+		return false;
+	}
+
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+	ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices);
+	if (ret < 0)
+		WARN("Failed to detach bpf program from cgroup");
+#endif
+
+	if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) {
+		struct generic_userns_exec_data wrap = {
+			.conf			= handler->conf,
+			.container_cgroup	= ops->container_cgroup,
+			.hierarchies		= ops->hierarchies,
+			.origuid		= 0,
+		};
+		ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper,
+				    &wrap, "cgroup_tree_remove_wrapper");
+	} else {
+		ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup);
+	}
+	if (ret < 0) {
+		SYSWARN("Failed to destroy cgroups");
+		return false;
+	}
+
+	return true;
+}
+#else
 __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
 						struct lxc_handler *handler)
 {
@@ -1044,6 +1144,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
 	if (ret < 0)
 		SYSWARN("Failed to destroy cgroups");
 }
+#endif
 
 #ifdef HAVE_ISULAD
 __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
@@ -4107,7 +4208,11 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
 		return NULL;
 
 	cgfsng_ops->data_init = cgfsng_data_init;
+#ifdef HAVE_ISULAD
+	cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy;
+#else
 	cgfsng_ops->payload_destroy = cgfsng_payload_destroy;
+#endif
 	cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
 	cgfsng_ops->monitor_create = cgfsng_monitor_create;
 	cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index dcdc76b..a9048c4 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -144,7 +144,11 @@ struct cgroup_ops {
 	cgroup_layout_t cgroup_layout;
 
 	int (*data_init)(struct cgroup_ops *ops, struct lxc_conf *conf);
+#ifdef HAVE_ISULAD
+	bool (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
+#else
 	void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
+#endif
 	void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
 	bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
 	bool (*monitor_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
diff --git a/src/lxc/start.c b/src/lxc/start.c
index 800f884..0942c31 100644
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -879,6 +879,170 @@ out_restore_sigmask:
 	return -1;
 }
 
+#ifdef HAVE_ISULAD
+void trim_line(char *s)
+{
+	size_t len;
+
+	len = strlen(s);
+	while ((len > 1) && (s[len - 1] == '\n'))
+		s[--len] = '\0';
+}
+
+static int _read_procs_file(const char *path, pid_t **pids, size_t *len)
+{
+	FILE *f;
+	char *line = NULL;
+	size_t sz = 0;
+	pid_t *tmp_pids = NULL;
+
+	f = fopen_cloexec(path, "r");
+	if (!f)
+		return -1;
+
+	while (getline(&line, &sz, f) != -1) {
+		pid_t pid;
+		trim_line(line);
+		pid = (pid_t)atoll(line);
+		if (lxc_mem_realloc((void **)&tmp_pids, sizeof(pid_t) * (*len + 1), *pids, sizeof(pid_t) * (*len)) != 0) {
+			free(*pids);
+			*pids = NULL;
+			ERROR("out of memory");
+			free(line);
+			fclose(f);
+			return -1;
+		}
+		*pids = tmp_pids;
+
+		(*pids)[*len] = pid;
+		(*len)++;
+	}
+
+	free(line);
+	fclose(f);
+	return 0;
+}
+
+static int _recursive_read_cgroup_procs(const char *dirpath, pid_t **pids, size_t *len)
+{
+	struct dirent *direntp = NULL;
+	DIR *dir = NULL;
+	int ret, failed = 0;
+	char pathname[PATH_MAX];
+
+	dir = opendir(dirpath);
+	if (dir == NULL) {
+		WARN("Failed to open \"%s\"", dirpath);
+		return 0;
+	}
+
+	while ((direntp = readdir(dir))) {
+		struct stat mystat;
+		int rc;
+
+		if (!strcmp(direntp->d_name, ".") ||
+		                !strcmp(direntp->d_name, ".."))
+			continue;
+
+		rc = snprintf(pathname, PATH_MAX, "%s/%s", dirpath, direntp->d_name);
+		if (rc < 0 || rc >= PATH_MAX) {
+			failed = 1;
+			continue;
+		}
+
+		if (strcmp(direntp->d_name, "cgroup.procs") == 0) {
+			if (_read_procs_file(pathname, pids, len)) {
+				failed = 1;
+
+			}
+			continue;
+		}
+
+		ret = lstat(pathname, &mystat);
+		if (ret) {
+			failed = 1;
+			continue;
+		}
+
+		if (S_ISDIR(mystat.st_mode)) {
+			if (_recursive_read_cgroup_procs(pathname, pids, len) < 0)
+				failed = 1;
+		}
+	}
+
+	ret = closedir(dir);
+	if (ret) {
+		WARN("Failed to close directory \"%s\"", dirpath);
+		failed = 1;
+	}
+
+	return failed ? -1 : 0;
+}
+
+int get_all_pids(struct cgroup_ops *cg_ops, pid_t **pids, size_t *len)
+{
+	const char *devices_path = NULL;
+
+	devices_path = cg_ops->get_cgroup_full_path(cg_ops, "devices");
+	if (!file_exists(devices_path)) {
+		return 0;
+	}
+
+	return _recursive_read_cgroup_procs(devices_path, pids, len);
+}
+
+static int set_cgroup_freezer(struct cgroup_ops *cg_ops, const char *value)
+{
+	char *fullpath;
+	int ret;
+
+	fullpath = must_make_path(cg_ops->get_cgroup_full_path(cg_ops, "freezer"), "freezer.state", NULL);
+	ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+	free(fullpath);
+	return ret;
+}
+
+/* isulad: kill all process in container cgroup path */
+static void signal_all_processes(struct lxc_handler *handler)
+{
+	int ret;
+	struct cgroup_ops *cg_ops = handler->cgroup_ops;
+	pid_t *pids = NULL;
+	size_t len = 0, i;
+
+	ret = set_cgroup_freezer(cg_ops, "FROZEN");
+	if (ret < 0 && errno != ENOENT) {
+		WARN("cgroup_set frozen failed");
+	}
+
+	ret = get_all_pids(cg_ops, &pids, &len);
+	if (ret < 0) {
+		WARN("failed to get all pids");
+	}
+
+	for (i = 0; i < len; i++) {
+		ret = kill(pids[i], SIGKILL);
+		if (ret < 0 && errno != ESRCH) {
+			WARN("Can not kill process (pid=%d) with SIGKILL for container %s", pids[i], handler->name);
+		}
+	}
+
+	ret = set_cgroup_freezer(cg_ops, "THAWED");
+	if (ret < 0 && errno != ENOENT) {
+		WARN("cgroup_set thawed failed");
+	}
+
+	for (i = 0; i < len; i++) {
+		ret = lxc_wait_for_pid_status(pids[i]);
+		if (ret < 0 && errno != ECHILD) {
+			WARN("Failed to wait pid %d for container %s: %s", pids[i], handler->name, strerror(errno));
+		}
+	}
+
+	free(pids);
+}
+#endif
+
 void lxc_end(struct lxc_handler *handler)
 {
 	int ret;
@@ -952,11 +1116,37 @@ void lxc_end(struct lxc_handler *handler)
 
 	lsm_process_cleanup(handler->conf, handler->lxcpath);
 
+#ifdef HAVE_ISULAD
+	// close maincmd fd before destroy cgroup for isulad
+	if (handler->conf->reboot == REBOOT_NONE) {
+		/* For all new state clients simply close the command socket.
+		 * This will inform all state clients that the container is
+		 * STOPPED and also prevents a race between a open()/close() on
+		 * the command socket causing a new process to get ECONNREFUSED
+		 * because we haven't yet closed the command socket.
+		 */
+		close_prot_errno_disarm(handler->conf->maincmd_fd);
+		TRACE("Closed command socket");
+	}
+	int retry_count = 0;
+	int max_retry = 10;
+retry:
+	if (cgroup_ops != NULL && !cgroup_ops->payload_destroy(cgroup_ops, handler)) {
+	        TRACE("Trying to kill all subprocess");
+	        signal_all_processes(handler);
+	        TRACE("Finished kill all subprocess");
+	        if (retry_count < max_retry) {
+				usleep(100 * 1000); /* 100 millisecond */
+				retry_count++;
+				goto retry;
+		}
+		SYSERROR("Failed to destroy cgroup path for container: \"%s\"", handler->name);
+	}
+#else
 	if (cgroup_ops) {
 		cgroup_ops->payload_destroy(cgroup_ops, handler);
 		cgroup_ops->monitor_destroy(cgroup_ops, handler);
 	}
-
 	if (handler->conf->reboot == REBOOT_NONE) {
 		/* For all new state clients simply close the command socket.
 		 * This will inform all state clients that the container is
@@ -966,7 +1156,10 @@ void lxc_end(struct lxc_handler *handler)
 		 */
 		close_prot_errno_disarm(handler->conf->maincmd_fd);
 		TRACE("Closed command socket");
+	}
+#endif
 
+	if (handler->conf->reboot == REBOOT_NONE) {
 		/* This function will try to connect to the legacy lxc-monitord
 		 * state server and only exists for backwards compatibility.
 		 */
-- 
1.8.3.1