From 4814d02fd3d364b599707b3cb298a8cc945033f9 Mon Sep 17 00:00:00 2001 From: LiFeng Date: Tue, 14 Apr 2020 17:07:24 +0800 Subject: [PATCH 21/49] cgroup: add retry for destory cgroups Signed-off-by: LiFeng --- src/lxc/cgroups/cgfsng.c | 105 +++++++++++++++++++++++++ src/lxc/cgroups/cgroup.h | 4 + src/lxc/start.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 303 insertions(+), 1 deletion(-) diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index 881dd39..00270ab 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -1000,6 +1000,106 @@ static int cgroup_tree_remove_wrapper(void *data) return cgroup_tree_remove(arg->hierarchies, arg->container_cgroup); } +#ifdef HAVE_ISULAD + +static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies, + const char *container_cgroup) +{ + if (!container_cgroup || !hierarchies) + return 0; + + for (int i = 0; hierarchies[i]; i++) { + struct hierarchy *h = hierarchies[i]; + int ret; + + if (!h->container_full_path) + continue; + + ret = lxc_rm_rf(h->container_full_path); + if (ret < 0) { + SYSERROR("Failed to destroy \"%s\"", h->container_full_path); + return -1; + } + + free_disarm(h->container_full_path); + } + + return 0; +} + +static int isulad_cgroup_tree_remove_wrapper(void *data) +{ + struct generic_userns_exec_data *arg = data; + uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid; + gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid; + int ret; + + if (!lxc_setgroups(0, NULL) && errno != EPERM) + return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)"); + + ret = setresgid(nsgid, nsgid, nsgid); + if (ret < 0) + return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)", + (int)nsgid, (int)nsgid, (int)nsgid); + + ret = setresuid(nsuid, nsuid, nsuid); + if (ret < 0) + return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)", + (int)nsuid, (int)nsuid, (int)nsuid); + + return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup); +} + +__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + int ret; + + if (!ops) { + ERROR("Called with uninitialized cgroup operations"); + return false; + } + + if (!ops->hierarchies) { + return false; + } + + if (!handler) { + ERROR("Called with uninitialized handler"); + return false; + } + + if (!handler->conf) { + ERROR("Called with uninitialized conf"); + return false; + } + +#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX + ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices); + if (ret < 0) + WARN("Failed to detach bpf program from cgroup"); +#endif + + if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) { + struct generic_userns_exec_data wrap = { + .conf = handler->conf, + .container_cgroup = ops->container_cgroup, + .hierarchies = ops->hierarchies, + .origuid = 0, + }; + ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper, + &wrap, "cgroup_tree_remove_wrapper"); + } else { + ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup); + } + if (ret < 0) { + SYSWARN("Failed to destroy cgroups"); + return false; + } + + return true; +} +#else __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, struct lxc_handler *handler) { @@ -1044,6 +1144,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, if (ret < 0) SYSWARN("Failed to destroy cgroups"); } +#endif #ifdef HAVE_ISULAD __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, @@ -4107,7 +4208,11 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) return NULL; cgfsng_ops->data_init = cgfsng_data_init; +#ifdef HAVE_ISULAD + cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy; +#else cgfsng_ops->payload_destroy = cgfsng_payload_destroy; +#endif cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy; cgfsng_ops->monitor_create = cgfsng_monitor_create; cgfsng_ops->monitor_enter = cgfsng_monitor_enter; diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h index dcdc76b..a9048c4 100644 --- a/src/lxc/cgroups/cgroup.h +++ b/src/lxc/cgroups/cgroup.h @@ -144,7 +144,11 @@ struct cgroup_ops { cgroup_layout_t cgroup_layout; int (*data_init)(struct cgroup_ops *ops, struct lxc_conf *conf); +#ifdef HAVE_ISULAD + bool (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); +#else void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); +#endif void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*monitor_enter)(struct cgroup_ops *ops, struct lxc_handler *handler); diff --git a/src/lxc/start.c b/src/lxc/start.c index 800f884..0942c31 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -879,6 +879,170 @@ out_restore_sigmask: return -1; } +#ifdef HAVE_ISULAD +void trim_line(char *s) +{ + size_t len; + + len = strlen(s); + while ((len > 1) && (s[len - 1] == '\n')) + s[--len] = '\0'; +} + +static int _read_procs_file(const char *path, pid_t **pids, size_t *len) +{ + FILE *f; + char *line = NULL; + size_t sz = 0; + pid_t *tmp_pids = NULL; + + f = fopen_cloexec(path, "r"); + if (!f) + return -1; + + while (getline(&line, &sz, f) != -1) { + pid_t pid; + trim_line(line); + pid = (pid_t)atoll(line); + if (lxc_mem_realloc((void **)&tmp_pids, sizeof(pid_t) * (*len + 1), *pids, sizeof(pid_t) * (*len)) != 0) { + free(*pids); + *pids = NULL; + ERROR("out of memory"); + free(line); + fclose(f); + return -1; + } + *pids = tmp_pids; + + (*pids)[*len] = pid; + (*len)++; + } + + free(line); + fclose(f); + return 0; +} + +static int _recursive_read_cgroup_procs(const char *dirpath, pid_t **pids, size_t *len) +{ + struct dirent *direntp = NULL; + DIR *dir = NULL; + int ret, failed = 0; + char pathname[PATH_MAX]; + + dir = opendir(dirpath); + if (dir == NULL) { + WARN("Failed to open \"%s\"", dirpath); + return 0; + } + + while ((direntp = readdir(dir))) { + struct stat mystat; + int rc; + + if (!strcmp(direntp->d_name, ".") || + !strcmp(direntp->d_name, "..")) + continue; + + rc = snprintf(pathname, PATH_MAX, "%s/%s", dirpath, direntp->d_name); + if (rc < 0 || rc >= PATH_MAX) { + failed = 1; + continue; + } + + if (strcmp(direntp->d_name, "cgroup.procs") == 0) { + if (_read_procs_file(pathname, pids, len)) { + failed = 1; + + } + continue; + } + + ret = lstat(pathname, &mystat); + if (ret) { + failed = 1; + continue; + } + + if (S_ISDIR(mystat.st_mode)) { + if (_recursive_read_cgroup_procs(pathname, pids, len) < 0) + failed = 1; + } + } + + ret = closedir(dir); + if (ret) { + WARN("Failed to close directory \"%s\"", dirpath); + failed = 1; + } + + return failed ? -1 : 0; +} + +int get_all_pids(struct cgroup_ops *cg_ops, pid_t **pids, size_t *len) +{ + const char *devices_path = NULL; + + devices_path = cg_ops->get_cgroup_full_path(cg_ops, "devices"); + if (!file_exists(devices_path)) { + return 0; + } + + return _recursive_read_cgroup_procs(devices_path, pids, len); +} + +static int set_cgroup_freezer(struct cgroup_ops *cg_ops, const char *value) +{ + char *fullpath; + int ret; + + fullpath = must_make_path(cg_ops->get_cgroup_full_path(cg_ops, "freezer"), "freezer.state", NULL); + ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); + free(fullpath); + return ret; +} + +/* isulad: kill all process in container cgroup path */ +static void signal_all_processes(struct lxc_handler *handler) +{ + int ret; + struct cgroup_ops *cg_ops = handler->cgroup_ops; + pid_t *pids = NULL; + size_t len = 0, i; + + ret = set_cgroup_freezer(cg_ops, "FROZEN"); + if (ret < 0 && errno != ENOENT) { + WARN("cgroup_set frozen failed"); + } + + ret = get_all_pids(cg_ops, &pids, &len); + if (ret < 0) { + WARN("failed to get all pids"); + } + + for (i = 0; i < len; i++) { + ret = kill(pids[i], SIGKILL); + if (ret < 0 && errno != ESRCH) { + WARN("Can not kill process (pid=%d) with SIGKILL for container %s", pids[i], handler->name); + } + } + + ret = set_cgroup_freezer(cg_ops, "THAWED"); + if (ret < 0 && errno != ENOENT) { + WARN("cgroup_set thawed failed"); + } + + for (i = 0; i < len; i++) { + ret = lxc_wait_for_pid_status(pids[i]); + if (ret < 0 && errno != ECHILD) { + WARN("Failed to wait pid %d for container %s: %s", pids[i], handler->name, strerror(errno)); + } + } + + free(pids); +} +#endif + void lxc_end(struct lxc_handler *handler) { int ret; @@ -952,11 +1116,37 @@ void lxc_end(struct lxc_handler *handler) lsm_process_cleanup(handler->conf, handler->lxcpath); +#ifdef HAVE_ISULAD + // close maincmd fd before destroy cgroup for isulad + if (handler->conf->reboot == REBOOT_NONE) { + /* For all new state clients simply close the command socket. + * This will inform all state clients that the container is + * STOPPED and also prevents a race between a open()/close() on + * the command socket causing a new process to get ECONNREFUSED + * because we haven't yet closed the command socket. + */ + close_prot_errno_disarm(handler->conf->maincmd_fd); + TRACE("Closed command socket"); + } + int retry_count = 0; + int max_retry = 10; +retry: + if (cgroup_ops != NULL && !cgroup_ops->payload_destroy(cgroup_ops, handler)) { + TRACE("Trying to kill all subprocess"); + signal_all_processes(handler); + TRACE("Finished kill all subprocess"); + if (retry_count < max_retry) { + usleep(100 * 1000); /* 100 millisecond */ + retry_count++; + goto retry; + } + SYSERROR("Failed to destroy cgroup path for container: \"%s\"", handler->name); + } +#else if (cgroup_ops) { cgroup_ops->payload_destroy(cgroup_ops, handler); cgroup_ops->monitor_destroy(cgroup_ops, handler); } - if (handler->conf->reboot == REBOOT_NONE) { /* For all new state clients simply close the command socket. * This will inform all state clients that the container is @@ -966,7 +1156,10 @@ void lxc_end(struct lxc_handler *handler) */ close_prot_errno_disarm(handler->conf->maincmd_fd); TRACE("Closed command socket"); + } +#endif + if (handler->conf->reboot == REBOOT_NONE) { /* This function will try to connect to the legacy lxc-monitord * state server and only exists for backwards compatibility. */ -- 1.8.3.1