From 41bc2b03f1c143352b025432a955e6a6dafd0e91 Mon Sep 17 00:00:00 2001 From: LiFeng Date: Sat, 11 Apr 2020 19:16:42 +0800 Subject: [PATCH 05/49] cgroup: refact cgroup implemt Signed-off-by: LiFeng --- src/lxc/cgroups/cgfsng.c | 816 ++++++++++++++++++++++++++++++++++++++++++++++- src/lxc/cgroups/cgroup.c | 4 +- src/lxc/cgroups/cgroup.h | 9 +- src/lxc/conf.c | 12 + src/lxc/utils.c | 30 +- src/lxc/utils.h | 4 + 6 files changed, 869 insertions(+), 6 deletions(-) diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index d3595bc..881dd39 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -1045,6 +1045,13 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, SYSWARN("Failed to destroy cgroups"); } +#ifdef HAVE_ISULAD +__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + return; +} +#else __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, struct lxc_handler *handler) { @@ -1117,6 +1124,7 @@ try_lxc_rm_rf: WARN("Failed to destroy \"%s\"", h->monitor_full_path); } } +#endif static int mkdir_eexist_on_last(const char *dir, mode_t mode) { @@ -1202,6 +1210,13 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload) SYSWARN("Failed to rmdir(\"%s\") cgroup", full_path); } +#ifdef HAVE_ISULAD +__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + return true; +} +#else __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, struct lxc_handler *handler) { @@ -1276,7 +1291,227 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, ops->monitor_cgroup = move_ptr(monitor_cgroup); return log_info(true, "The monitor process uses \"%s\" as cgroup", ops->monitor_cgroup); } +#endif + +#ifdef HAVE_ISULAD + +static bool isulad_copy_parent_file(char *path, char *file) +{ + int ret; + int len = 0; + char *value = NULL; + char *current = NULL; + char *fpath = NULL; + char *lastslash = NULL; + char oldv; + + fpath = must_make_path(path, file, NULL); + current = read_file(fpath); + + if (current == NULL) { + SYSERROR("Failed to read file \"%s\"", fpath); + free(fpath); + return false; + } + + if (strcmp(current, "\n") != 0) { + free(fpath); + free(current); + return true; + } + + free(fpath); + free(current); + + lastslash = strrchr(path, '/'); + if (lastslash == NULL) { + ERROR("Failed to detect \"/\" in \"%s\"", path); + return false; + } + oldv = *lastslash; + *lastslash = '\0'; + fpath = must_make_path(path, file, NULL); + *lastslash = oldv; + len = lxc_read_from_file(fpath, NULL, 0); + if (len <= 0) + goto on_error; + + value = must_realloc(NULL, len + 1); + ret = lxc_read_from_file(fpath, value, len); + if (ret != len) + goto on_error; + free(fpath); + + fpath = must_make_path(path, file, NULL); + ret = lxc_write_to_file(fpath, value, len, false, 0666); + if (ret < 0) + SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath); + free(fpath); + free(value); + return ret >= 0; + +on_error: + SYSERROR("Failed to read file \"%s\"", fpath); + free(fpath); + free(value); + return false; +} + +static bool build_sub_cpuset_cgroup_dir(char *cgpath) +{ + int ret; + + ret = mkdir_p(cgpath, 0755); + if (ret < 0) { + if (errno != EEXIST) { + SYSERROR("Failed to create directory \"%s\"", cgpath); + return false; + } + } + + /* copy parent's settings */ + if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) { + SYSERROR("Failed to copy \"cpuset.cpus\" settings"); + return false; + } + + /* copy parent's settings */ + if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) { + SYSERROR("Failed to copy \"cpuset.mems\" settings"); + return false; + } + + return true; +} + +static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname) +{ + char *cgpath, *slash; + bool sub_mk_success = false; + + if (!string_in_list(h->controllers, "cpuset")) + return true; + + cgname += strspn(cgname, "/"); + + slash = strchr(cgname, '/'); + + if (slash != NULL) { + while (slash) { + *slash = '\0'; + cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); + sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); + free(cgpath); + *slash = '/'; + if (!sub_mk_success) { + return false; + } + slash = strchr(slash + 1, '/'); + } + } + + cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); + sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); + free(cgpath); + if (!sub_mk_success) { + return false; + } + + return true; +} + +static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode) +{ + const char *tmp = dir; + const char *orig = dir; + + do { + int ret; + size_t cur_len; + char *makeme; + + dir = tmp + strspn(tmp, "/"); + tmp = dir + strcspn(dir, "/"); + + errno = ENOMEM; + cur_len = dir - orig; + makeme = strndup(orig, cur_len); + if (!makeme) + return -1; + + ret = mkdir(makeme, mode); + if (ret < 0) { + if (errno != EEXIST) { + SYSERROR("Failed to create directory \"%s\"", makeme); + free(makeme); + return -1; + } + } + free(makeme); + + } while (tmp != dir); + + return 0; +} + +static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd) +{ + int ret; + __do_free char *path = NULL; + + path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); + + if (file_exists(path)) { // it must not already exist + ERROR("Cgroup path \"%s\" already exist.", path); + lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.", + __FILE__, __LINE__, path); + return false; + } + + if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) { + ERROR("Failed to handle legacy cpuset controller"); + return false; + } + + ret = isulad_mkdir_eexist_on_last(path, 0755); + if (ret < 0) { + ERROR("Failed to create cgroup \"%s\"", path); + return false; + } + + h->cgfd_con = lxc_open_dirfd(path); + if (h->cgfd_con < 0) + return log_error_errno(false, errno, "Failed to open %s", path); + + if (h->container_full_path == NULL) { + h->container_full_path = move_ptr(path); + } + + return true; +} +/* isulad: create hierarchies path, if fail, return the error */ +__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + int i; + char *container_cgroup = ops->container_cgroup; + + if (!container_cgroup) { + ERROR("cgfsng_create container_cgroup is invalid"); + return false; + } + + for (i = 0; ops->hierarchies[i]; i++) { + if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) { + SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path); + return false; + } + } + + return true; +} +#else /* * Try to create the same cgroup in all hierarchies. Start with cgroup_pattern; * next cgroup_pattern-1, -2, ..., -999. @@ -1356,7 +1591,15 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup); return true; } +#endif +#ifdef HAVE_ISULAD +__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + return true; +} +#else __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, struct lxc_handler *handler) { @@ -1408,7 +1651,58 @@ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, return true; } +#endif + +#ifdef HAVE_ISULAD +__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + int len; + char pidstr[INTTYPE_TO_STRLEN(pid_t)]; + + if (!ops) + return ret_set_errno(false, ENOENT); + + if (!ops->hierarchies) + return true; + + if (!ops->container_cgroup) + return ret_set_errno(false, ENOENT); + + if (!handler || !handler->conf) + return ret_set_errno(false, EINVAL); + + len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid); + + for (int i = 0; ops->hierarchies[i]; i++) { + int ret; + char *fullpath; + int retry_count = 0; + int max_retry = 10; + fullpath = must_make_path(ops->hierarchies[i]->container_full_path, + "cgroup.procs", NULL); +retry: + ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666); + if (ret != 0) { + if (retry_count < max_retry) { + SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count); + (void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup); + (void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755); + usleep(100 * 1000); /* 100 millisecond */ + retry_count++; + goto retry; + } + SYSERROR("Failed to enter cgroup \"%s\"", fullpath); + free(fullpath); + return false; + } + free(fullpath); + } + + return true; +} +#else __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, struct lxc_handler *handler) { @@ -1440,6 +1734,7 @@ __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, return true; } +#endif static int fchowmodat(int dirfd, const char *path, uid_t chown_uid, gid_t chown_gid, mode_t chmod_mode) @@ -1687,6 +1982,167 @@ static inline int cg_mount_cgroup_full(int type, struct hierarchy *h, return __cg_mount_direct(type, h, controllerpath); } +#ifdef HAVE_ISULAD +__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, + struct lxc_handler *handler, + const char *root, int type) +{ + int i, ret; + char *tmpfspath = NULL; + bool has_cgns = false, retval = false, wants_force_mount = false; + char **merged = NULL; + + if ((type & LXC_AUTO_CGROUP_MASK) == 0) + return true; + + if (type & LXC_AUTO_CGROUP_FORCE) { + type &= ~LXC_AUTO_CGROUP_FORCE; + wants_force_mount = true; + } + + if (!wants_force_mount) { + if (!lxc_list_empty(&handler->conf->keepcaps)) + wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps); + else + wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps); + } + + has_cgns = cgns_supported(); + if (has_cgns && !wants_force_mount) + return true; + + if (type == LXC_AUTO_CGROUP_NOSPEC) + type = LXC_AUTO_CGROUP_MIXED; + else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC) + type = LXC_AUTO_CGROUP_FULL_MIXED; + + /* Mount tmpfs */ + tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL); + if (mkdir_p(tmpfspath, 0755) < 0) { + ERROR("Failed to create directory: %s", tmpfspath); + goto on_error; + } + ret = safe_mount(NULL, tmpfspath, "tmpfs", + MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, + "size=10240k,mode=755", root); + if (ret < 0) + goto on_error; + + for (i = 0; ops->hierarchies[i]; i++) { + char *controllerpath = NULL; + char *path2 = NULL; + struct hierarchy *h = ops->hierarchies[i]; + char *controller = strrchr(h->mountpoint, '/'); + + if (!controller) + continue; + controller++; + + // isulad: symlink subcgroup + if (strchr(controller, ',') != NULL) { + int pret; + pret = lxc_append_string(&merged, controller); + if (pret < 0) + goto on_error; + } + + controllerpath = must_make_path(tmpfspath, controller, NULL); + if (dir_exists(controllerpath)) { + free(controllerpath); + continue; + } + + ret = mkdir(controllerpath, 0755); + if (ret < 0) { + SYSERROR("Error creating cgroup path: %s", controllerpath); + free(controllerpath); + goto on_error; + } + + if (has_cgns && wants_force_mount) { + /* If cgroup namespaces are supported but the container + * will not have CAP_SYS_ADMIN after it has started we + * need to mount the cgroups manually. + */ + ret = cg_mount_in_cgroup_namespace(type, h, controllerpath); + free(controllerpath); + if (ret < 0) + goto on_error; + + continue; + } + + ret = cg_mount_cgroup_full(type, h, controllerpath); + if (ret < 0) { + free(controllerpath); + goto on_error; + } + + if (!cg_mount_needs_subdirs(type)) { + free(controllerpath); + continue; + } + + // isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container, + // isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container + path2 = must_make_path(controllerpath, NULL); + ret = mkdir_p(path2, 0755); + if (ret < 0) { + free(controllerpath); + free(path2); + goto on_error; + } + + ret = cg_legacy_mount_controllers(type, h, controllerpath, + path2, ops->container_cgroup); + free(controllerpath); + free(path2); + if (ret < 0) + goto on_error; + } + + // isulad: symlink subcgroup + if (merged) { + char **mc = NULL; + for (mc = merged; *mc; mc++) { + char *token = NULL; + char *copy = must_copy_string(*mc); + lxc_iterate_parts(token, copy, ",") { + int mret; + char *link; + link = must_make_path(tmpfspath, token, NULL); + mret = symlink(*mc, link); + if (mret < 0 && errno != EEXIST) { + SYSERROR("Failed to create link %s for target %s", link, *mc); + free(copy); + free(link); + goto on_error; + } + free(link); + } + free(copy); + } + } + + + // isulad: remount /sys/fs/cgroup to readonly + if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) { + ret = mount(tmpfspath, tmpfspath, "bind", + MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL); + if (ret < 0) { + SYSERROR("Failed to remount /sys/fs/cgroup."); + goto on_error; + } + } + + retval = true; + +on_error: + free(tmpfspath); + lxc_free_array((void **)merged, free); + return retval; +} +#else __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, struct lxc_handler *handler, const char *root, int type) @@ -1799,6 +2255,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, return true; } +#endif /* Only root needs to escape to the cgroup of its init. */ __cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops, @@ -2054,6 +2511,24 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, : NULL; } +#ifdef HAVE_ISULAD +__cgfsng_ops static const char *cgfsng_get_cgroup_full_path(struct cgroup_ops *ops, + const char *controller) +{ + struct hierarchy *h; + + h = get_hierarchy(ops, controller); + if (!h) + return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", + controller ? controller : "(null)"); + + if (!h->container_full_path) + h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL); + + return h->container_full_path; +} +#endif + /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path, * which must be freed by the caller. */ @@ -2360,6 +2835,44 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, return true; } +#ifdef HAVE_ISULAD +__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, + char *value, size_t len, const char *name, + const char *lxcpath) +{ + int ret = -1; + size_t controller_len; + char *controller, *p, *path; + struct hierarchy *h; + + controller_len = strlen(filename); + controller = alloca(controller_len + 1); + (void)strlcpy(controller, filename, controller_len + 1); + + p = strchr(controller, '.'); + if (p) + *p = '\0'; + + const char *ori_path = ops->get_cgroup(ops, controller); + if (ori_path == NULL) { + ERROR("Failed to get cgroup path:%s", controller); + return -1; + } + path = safe_strdup(ori_path); + + h = get_hierarchy(ops, controller); + if (h) { + char *fullpath; + + fullpath = build_full_cgpath_from_monitorpath(h, path, filename); + ret = lxc_read_from_file(fullpath, value, len); + free(fullpath); + } + free(path); + + return ret; +} +#else /* Called externally (i.e. from 'lxc-cgroup') to query cgroup limits. Here we * don't have a cgroup_data set up, so we ask the running container through the * commands API for the cgroup path. @@ -2397,6 +2910,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, return ret; } +#endif static int device_cgroup_parse_access(struct device_item *device, const char *val) { @@ -2510,6 +3024,44 @@ static int device_cgroup_rule_parse(struct device_item *device, const char *key, return device_cgroup_parse_access(device, ++val); } +#ifdef HAVE_ISULAD +__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, + const char *filename, const char *value, + const char *name, const char *lxcpath) +{ + int ret = -1; + size_t controller_len; + char *controller, *p, *path; + struct hierarchy *h; + + controller_len = strlen(filename); + controller = alloca(controller_len + 1); + (void)strlcpy(controller, filename, controller_len + 1); + + p = strchr(controller, '.'); + if (p) + *p = '\0'; + + const char *ori_path = ops->get_cgroup(ops, controller); + if (ori_path == NULL) { + ERROR("Failed to get cgroup path:%s", controller); + return -1; + } + path = safe_strdup(ori_path); + + h = get_hierarchy(ops, controller); + if (h) { + char *fullpath; + + fullpath = build_full_cgpath_from_monitorpath(h, path, filename); + ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); + free(fullpath); + } + free(path); + + return ret; +} +#else /* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits. Here we * don't have a cgroup_data set up, so we ask the running container through the * commands API for the cgroup path. @@ -2562,6 +3114,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, return ret; } +#endif /* take devices cgroup line * /dev/foo rwx @@ -2686,6 +3239,199 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, return lxc_write_openat(h->container_full_path, filename, value, strlen(value)); } +#ifdef HAVE_ISULAD +/* Called from setup_limits - here we have the container's cgroup_data because + * we created the cgroups. + */ +static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename, + char *value, size_t len) +{ + char *fullpath = NULL; + char *p = NULL; + struct hierarchy *h = NULL; + int ret = 0; + char *controller = NULL; + + len = strlen(filename); + if (SIZE_MAX - 1 < len) { + errno = EINVAL; + return -1; + } + controller = calloc(1, len + 1); + if (controller == NULL) { + errno = ENOMEM; + return -1; + } + (void)strlcpy(controller, filename, len + 1); + + p = strchr(controller, '.'); + if (p) + *p = '\0'; + + + h = get_hierarchy(ops, controller); + if (!h) { + ERROR("Failed to setup limits for the \"%s\" controller. " + "The controller seems to be unused by \"cgfsng\" cgroup " + "driver or not enabled on the cgroup hierarchy", + controller); + errno = ENOENT; + free(controller); + return -ENOENT; + } + + fullpath = must_make_path(h->container_full_path, filename, NULL); + ret = lxc_read_from_file(fullpath, value, len); + free(fullpath); + free(controller); + return ret; +} + +static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, + const char *value) +{ + size_t len; + char *fullpath, *p; + /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */ + char converted_value[50]; + struct hierarchy *h; + int ret = 0; + char *controller = NULL; + int retry_count = 0; + int max_retry = 10; + char *container_cgroup = ops->container_cgroup; + + len = strlen(filename); + controller = alloca(len + 1); + (void)strlcpy(controller, filename, len + 1); + + p = strchr(controller, '.'); + if (p) + *p = '\0'; + + if (strcmp("devices.allow", filename) == 0 && value[0] == '/') { + ret = convert_devpath(value, converted_value); + if (ret < 0) + return ret; + value = converted_value; + } + + h = get_hierarchy(ops, controller); + if (!h) { + ERROR("Failed to setup limits for the \"%s\" controller. " + "The controller seems to be unused by \"cgfsng\" cgroup " + "driver or not enabled on the cgroup hierarchy", + controller); + errno = ENOENT; + return -ENOENT; + } + + fullpath = must_make_path(h->container_full_path, filename, NULL); + +retry: + ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); + if (ret != 0) { + if (retry_count < max_retry) { + SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath); + (void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup); + (void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755); + usleep(100 * 1000); /* 100 millisecond */ + retry_count++; + goto retry; + } + lxc_write_error_message(ops->errfd, + "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".", + __FILE__, __LINE__, value, fullpath, strerror(errno)); + } + free(fullpath); + return ret; +} + +__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, + struct lxc_conf *conf, + bool do_devices) +{ + __do_free struct lxc_list *sorted_cgroup_settings = NULL; + struct lxc_list *cgroup_settings = &conf->cgroup; + struct lxc_list *iterator, *next; + struct lxc_cgroup *cg; + bool ret = false; + char value[21 + 1] = { 0 }; + long long int readvalue, setvalue; + + if (!ops) + return ret_set_errno(false, ENOENT); + + if (!conf) + return ret_set_errno(false, EINVAL); + + cgroup_settings = &conf->cgroup; + if (lxc_list_empty(cgroup_settings)) + return true; + + if (!ops->hierarchies) + return ret_set_errno(false, EINVAL); + + sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings); + if (!sorted_cgroup_settings) + return false; + + lxc_list_for_each(iterator, sorted_cgroup_settings) { + cg = iterator->elem; + + if (do_devices == !strncmp("devices", cg->subsystem, 7)) { + if (isulad_cg_legacy_set_data(ops, cg->subsystem, cg->value)) { + if (do_devices && (errno == EACCES || errno == EPERM)) { + SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value); + continue; + } + SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value); + goto out; + } + DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cg->value); + } + // isulad: check cpu shares + if (strcmp(cg->subsystem, "cpu.shares") == 0) { + if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) { + SYSERROR("Error get %s", cg->subsystem); + goto out; + } + trim(value); + if (lxc_safe_long_long(cg->value, &setvalue) != 0) { + SYSERROR("Invalid value %s", cg->value); + goto out; + } + if (lxc_safe_long_long(value, &readvalue) != 0) { + SYSERROR("Invalid value %s", value); + goto out; + } + if (setvalue > readvalue) { + ERROR("The maximum allowed cpu-shares is %s", value); + lxc_write_error_message(ops->errfd, + "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".", + __FILE__, __LINE__, value); + goto out; + } else if (setvalue < readvalue) { + ERROR("The minimum allowed cpu-shares is %s", value); + lxc_write_error_message(ops->errfd, + "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".", + __FILE__, __LINE__, value); + goto out; + } + } + } + + ret = true; + INFO("Limits for the legacy cgroup hierarchies have been setup"); +out: + lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) { + lxc_list_del(iterator); + free(iterator); + } + + return ret; +} +#else __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, struct lxc_conf *conf, bool do_devices) @@ -2739,6 +3485,7 @@ out: return ret; } +#endif /* * Some of the parsing logic comes from the original cgroup device v1 @@ -2950,6 +3697,12 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup) return true; } +#ifdef HAVE_ISULAD +__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) +{ + return true; +} +#else __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) { if (!ops) @@ -2957,6 +3710,7 @@ __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup); } +#endif __cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops) { @@ -3107,6 +3861,22 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg trim(base_cgroup); prune_init_scope(base_cgroup); +#ifdef HAVE_ISULAD + /* isulad: do not test writeable, if we run isulad in docker without cgroup namespace. + * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */ + + /* + * reason:base cgroup may be started with /system.slice when cg_hybrid_init + * read /proc/1/cgroup on host, and cgroup init will set all containers + * cgroup path under /sys/fs/cgroup//system.slice/xxx/lxc + * directory, this is not consistent with docker. The default cgroup path + * should be under /sys/fs/cgroup//lxc directory. + */ + + if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') { + base_cgroup[1] = '\0'; + } +#else if (type == CGROUP2_SUPER_MAGIC) writeable = test_writeable_v2(mountpoint, base_cgroup); else @@ -3115,7 +3885,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg TRACE("The %s group is not writeable", base_cgroup); continue; } - +#endif if (type == CGROUP2_SUPER_MAGIC) { char *cgv2_ctrl_path; @@ -3268,7 +4038,45 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf) return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map)); } -__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops) +#ifdef HAVE_ISULAD +__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf) +{ + const char *cgroup_pattern; + const char *cgroup_tree; + __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL; + size_t len; + + if (!ops) + return ret_set_errno(-1, ENOENT); + + /* copy system-wide cgroup information */ + cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern"); + if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0) + ops->cgroup_pattern = must_copy_string(cgroup_pattern); + + if (conf->cgroup_meta.dir) { + cgroup_tree = conf->cgroup_meta.dir; + container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL); + } else if (ops->cgroup_pattern) { + __cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern); + if (!__cgroup_tree) + return ret_set_errno(-1, ENOMEM); + + cgroup_tree = __cgroup_tree; + container_cgroup = must_concat(&len, cgroup_tree, NULL); + } else { + cgroup_tree = NULL; + container_cgroup = must_concat(&len, conf->name, NULL); + } + if (!container_cgroup) + return ret_set_errno(-1, ENOMEM); + + ops->container_cgroup = move_ptr(container_cgroup); + + return 0; +} +#else +__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf) { const char *cgroup_pattern; @@ -3282,6 +4090,7 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops) return 0; } +#endif struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) { @@ -3311,6 +4120,9 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies; cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies; cgfsng_ops->get_cgroup = cgfsng_get_cgroup; +#ifdef HAVE_ISULAD + cgfsng_ops->get_cgroup_full_path = cgfsng_get_cgroup_full_path; +#endif cgfsng_ops->get = cgfsng_get; cgfsng_ops->set = cgfsng_set; cgfsng_ops->freeze = cgfsng_freeze; diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c index 37fd0e3..ad46d5c 100644 --- a/src/lxc/cgroups/cgroup.c +++ b/src/lxc/cgroups/cgroup.c @@ -31,7 +31,7 @@ struct cgroup_ops *cgroup_init(struct lxc_conf *conf) if (!cgroup_ops) return log_error_errno(NULL, errno, "Failed to initialize cgroup driver"); - if (cgroup_ops->data_init(cgroup_ops)) { + if (cgroup_ops->data_init(cgroup_ops, conf)) { cgroup_exit(cgroup_ops); return log_error_errno(NULL, errno, "Failed to initialize cgroup data"); @@ -79,7 +79,7 @@ void cgroup_exit(struct cgroup_ops *ops) free((*it)->container_base_path); free((*it)->container_full_path); free((*it)->monitor_full_path); - if ((*it)->cgfd_mon >= 0) + if ((*it)->cgfd_con >= 0) close((*it)->cgfd_con); if ((*it)->cgfd_mon >= 0) close((*it)->cgfd_mon); diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h index 1e08a01..dcdc76b 100644 --- a/src/lxc/cgroups/cgroup.h +++ b/src/lxc/cgroups/cgroup.h @@ -102,6 +102,10 @@ struct cgroup_ops { char *container_cgroup; char *monitor_cgroup; +#ifdef HAVE_ISULAD + int errfd; +#endif + /* @hierarchies * - A NULL-terminated array of struct hierarchy, one per legacy * hierarchy. No duplicates. First sufficient, writeable mounted @@ -139,7 +143,7 @@ struct cgroup_ops { */ cgroup_layout_t cgroup_layout; - int (*data_init)(struct cgroup_ops *ops); + int (*data_init)(struct cgroup_ops *ops, struct lxc_conf *conf); void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler); @@ -147,6 +151,9 @@ struct cgroup_ops { bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*payload_enter)(struct cgroup_ops *ops, struct lxc_handler *handler); const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller); +#ifdef HAVE_ISULAD + const char *(*get_cgroup_full_path)(struct cgroup_ops *ops, const char *controller); +#endif bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf); int (*num_hierarchies)(struct cgroup_ops *ops); bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out); diff --git a/src/lxc/conf.c b/src/lxc/conf.c index e3fce51..e806605 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -637,8 +637,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL }, { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL }, { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, + #ifdef HAVE_ISULAD + { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, + { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, + #else { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL }, { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL }, + #endif { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL }, { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL }, @@ -670,6 +675,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha if (!destination) return -1; +#ifdef HAVE_ISULAD + if (mkdir_p(destination, 0755) < 0) { + SYSERROR("Failed to create mount target '%s'", destination); + return log_error(-1, "Failed to mkdir destination %s", destination); + } +#endif + mflags = add_required_remount_flags(source, destination, default_mounts[i].flags); r = safe_mount(source, destination, default_mounts[i].fstype, diff --git a/src/lxc/utils.c b/src/lxc/utils.c index 2cf9994..160b3db 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -1755,8 +1755,13 @@ int lxc_rm_rf(const char *dirname) struct dirent *direntp; dir = opendir(dirname); - if (!dir) + if (!dir) { + if (errno == ENOENT) { + WARN("Destroy path: \"%s\" do not exist", dirname); + return 0; + } return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname); + } while ((direntp = readdir(dir))) { __do_free char *pathname = NULL; @@ -1904,3 +1909,26 @@ int fix_stdio_permissions(uid_t uid) return fret; } + +#ifdef HAVE_ISULAD +/* isulad: write error message */ +void lxc_write_error_message(int errfd, const char *format, ...) +{ + int ret; + char errbuf[BUFSIZ + 1] = {0}; + ssize_t sret; + va_list argp; + + if (errfd <= 0) + return; + + va_start(argp, format); + ret = vsnprintf(errbuf, BUFSIZ, format, argp); + va_end(argp); + if (ret < 0 || ret >= BUFSIZ) + SYSERROR("Failed to call vsnprintf"); + sret = write(errfd, errbuf, strlen(errbuf)); + if (sret < 0) + SYSERROR("Write errbuf failed"); +} +#endif diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 7b36133..3c30565 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -244,4 +244,8 @@ extern bool lxc_can_use_pidfd(int pidfd); extern int fix_stdio_permissions(uid_t uid); +#ifdef HAVE_ISULAD +extern void lxc_write_error_message(int errfd, const char *format, ...); +#endif + #endif /* __LXC_UTILS_H */ -- 1.8.3.1