lxc/0005-cgroup-refact-cgroup-implemt.patch
lifeng 8966f1fe72 lxc: update lxc to 4.0.1
Signed-off-by: lifeng <lifeng68@huawei.com>
2020-04-23 19:30:12 +08:00

1150 lines
34 KiB
Diff

From 41bc2b03f1c143352b025432a955e6a6dafd0e91 Mon Sep 17 00:00:00 2001
From: LiFeng <lifeng68@huawei.com>
Date: Sat, 11 Apr 2020 19:16:42 +0800
Subject: [PATCH 05/49] cgroup: refact cgroup implemt
Signed-off-by: LiFeng <lifeng68@huawei.com>
---
src/lxc/cgroups/cgfsng.c | 816 ++++++++++++++++++++++++++++++++++++++++++++++-
src/lxc/cgroups/cgroup.c | 4 +-
src/lxc/cgroups/cgroup.h | 9 +-
src/lxc/conf.c | 12 +
src/lxc/utils.c | 30 +-
src/lxc/utils.h | 4 +
6 files changed, 869 insertions(+), 6 deletions(-)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index d3595bc..881dd39 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1045,6 +1045,13 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
SYSWARN("Failed to destroy cgroups");
}
+#ifdef HAVE_ISULAD
+__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ return;
+}
+#else
__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
@@ -1117,6 +1124,7 @@ try_lxc_rm_rf:
WARN("Failed to destroy \"%s\"", h->monitor_full_path);
}
}
+#endif
static int mkdir_eexist_on_last(const char *dir, mode_t mode)
{
@@ -1202,6 +1210,13 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload)
SYSWARN("Failed to rmdir(\"%s\") cgroup", full_path);
}
+#ifdef HAVE_ISULAD
+__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ return true;
+}
+#else
__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
@@ -1276,7 +1291,227 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
ops->monitor_cgroup = move_ptr(monitor_cgroup);
return log_info(true, "The monitor process uses \"%s\" as cgroup", ops->monitor_cgroup);
}
+#endif
+
+#ifdef HAVE_ISULAD
+
+static bool isulad_copy_parent_file(char *path, char *file)
+{
+ int ret;
+ int len = 0;
+ char *value = NULL;
+ char *current = NULL;
+ char *fpath = NULL;
+ char *lastslash = NULL;
+ char oldv;
+
+ fpath = must_make_path(path, file, NULL);
+ current = read_file(fpath);
+
+ if (current == NULL) {
+ SYSERROR("Failed to read file \"%s\"", fpath);
+ free(fpath);
+ return false;
+ }
+
+ if (strcmp(current, "\n") != 0) {
+ free(fpath);
+ free(current);
+ return true;
+ }
+
+ free(fpath);
+ free(current);
+
+ lastslash = strrchr(path, '/');
+ if (lastslash == NULL) {
+ ERROR("Failed to detect \"/\" in \"%s\"", path);
+ return false;
+ }
+ oldv = *lastslash;
+ *lastslash = '\0';
+ fpath = must_make_path(path, file, NULL);
+ *lastslash = oldv;
+ len = lxc_read_from_file(fpath, NULL, 0);
+ if (len <= 0)
+ goto on_error;
+
+ value = must_realloc(NULL, len + 1);
+ ret = lxc_read_from_file(fpath, value, len);
+ if (ret != len)
+ goto on_error;
+ free(fpath);
+
+ fpath = must_make_path(path, file, NULL);
+ ret = lxc_write_to_file(fpath, value, len, false, 0666);
+ if (ret < 0)
+ SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath);
+ free(fpath);
+ free(value);
+ return ret >= 0;
+
+on_error:
+ SYSERROR("Failed to read file \"%s\"", fpath);
+ free(fpath);
+ free(value);
+ return false;
+}
+
+static bool build_sub_cpuset_cgroup_dir(char *cgpath)
+{
+ int ret;
+
+ ret = mkdir_p(cgpath, 0755);
+ if (ret < 0) {
+ if (errno != EEXIST) {
+ SYSERROR("Failed to create directory \"%s\"", cgpath);
+ return false;
+ }
+ }
+
+ /* copy parent's settings */
+ if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) {
+ SYSERROR("Failed to copy \"cpuset.cpus\" settings");
+ return false;
+ }
+
+ /* copy parent's settings */
+ if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) {
+ SYSERROR("Failed to copy \"cpuset.mems\" settings");
+ return false;
+ }
+
+ return true;
+}
+
+static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
+{
+ char *cgpath, *slash;
+ bool sub_mk_success = false;
+
+ if (!string_in_list(h->controllers, "cpuset"))
+ return true;
+
+ cgname += strspn(cgname, "/");
+
+ slash = strchr(cgname, '/');
+
+ if (slash != NULL) {
+ while (slash) {
+ *slash = '\0';
+ cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+ sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
+ free(cgpath);
+ *slash = '/';
+ if (!sub_mk_success) {
+ return false;
+ }
+ slash = strchr(slash + 1, '/');
+ }
+ }
+
+ cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+ sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
+ free(cgpath);
+ if (!sub_mk_success) {
+ return false;
+ }
+
+ return true;
+}
+
+static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode)
+{
+ const char *tmp = dir;
+ const char *orig = dir;
+
+ do {
+ int ret;
+ size_t cur_len;
+ char *makeme;
+
+ dir = tmp + strspn(tmp, "/");
+ tmp = dir + strcspn(dir, "/");
+
+ errno = ENOMEM;
+ cur_len = dir - orig;
+ makeme = strndup(orig, cur_len);
+ if (!makeme)
+ return -1;
+
+ ret = mkdir(makeme, mode);
+ if (ret < 0) {
+ if (errno != EEXIST) {
+ SYSERROR("Failed to create directory \"%s\"", makeme);
+ free(makeme);
+ return -1;
+ }
+ }
+ free(makeme);
+
+ } while (tmp != dir);
+
+ return 0;
+}
+
+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd)
+{
+ int ret;
+ __do_free char *path = NULL;
+
+ path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+
+ if (file_exists(path)) { // it must not already exist
+ ERROR("Cgroup path \"%s\" already exist.", path);
+ lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.",
+ __FILE__, __LINE__, path);
+ return false;
+ }
+
+ if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+ ERROR("Failed to handle legacy cpuset controller");
+ return false;
+ }
+
+ ret = isulad_mkdir_eexist_on_last(path, 0755);
+ if (ret < 0) {
+ ERROR("Failed to create cgroup \"%s\"", path);
+ return false;
+ }
+
+ h->cgfd_con = lxc_open_dirfd(path);
+ if (h->cgfd_con < 0)
+ return log_error_errno(false, errno, "Failed to open %s", path);
+
+ if (h->container_full_path == NULL) {
+ h->container_full_path = move_ptr(path);
+ }
+
+ return true;
+}
+/* isulad: create hierarchies path, if fail, return the error */
+__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ int i;
+ char *container_cgroup = ops->container_cgroup;
+
+ if (!container_cgroup) {
+ ERROR("cgfsng_create container_cgroup is invalid");
+ return false;
+ }
+
+ for (i = 0; ops->hierarchies[i]; i++) {
+ if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) {
+ SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path);
+ return false;
+ }
+ }
+
+ return true;
+}
+#else
/*
* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
* next cgroup_pattern-1, -2, ..., -999.
@@ -1356,7 +1591,15 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup);
return true;
}
+#endif
+#ifdef HAVE_ISULAD
+__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ return true;
+}
+#else
__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
@@ -1408,7 +1651,58 @@ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
return true;
}
+#endif
+
+#ifdef HAVE_ISULAD
+__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ int len;
+ char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!ops->hierarchies)
+ return true;
+
+ if (!ops->container_cgroup)
+ return ret_set_errno(false, ENOENT);
+
+ if (!handler || !handler->conf)
+ return ret_set_errno(false, EINVAL);
+
+ len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
+
+ for (int i = 0; ops->hierarchies[i]; i++) {
+ int ret;
+ char *fullpath;
+ int retry_count = 0;
+ int max_retry = 10;
+ fullpath = must_make_path(ops->hierarchies[i]->container_full_path,
+ "cgroup.procs", NULL);
+retry:
+ ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
+ if (ret != 0) {
+ if (retry_count < max_retry) {
+ SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count);
+ (void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup);
+ (void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755);
+ usleep(100 * 1000); /* 100 millisecond */
+ retry_count++;
+ goto retry;
+ }
+ SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
+ free(fullpath);
+ return false;
+ }
+ free(fullpath);
+ }
+
+ return true;
+}
+#else
__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
@@ -1440,6 +1734,7 @@ __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
return true;
}
+#endif
static int fchowmodat(int dirfd, const char *path, uid_t chown_uid,
gid_t chown_gid, mode_t chmod_mode)
@@ -1687,6 +1982,167 @@ static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
return __cg_mount_direct(type, h, controllerpath);
}
+#ifdef HAVE_ISULAD
+__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
+ struct lxc_handler *handler,
+ const char *root, int type)
+{
+ int i, ret;
+ char *tmpfspath = NULL;
+ bool has_cgns = false, retval = false, wants_force_mount = false;
+ char **merged = NULL;
+
+ if ((type & LXC_AUTO_CGROUP_MASK) == 0)
+ return true;
+
+ if (type & LXC_AUTO_CGROUP_FORCE) {
+ type &= ~LXC_AUTO_CGROUP_FORCE;
+ wants_force_mount = true;
+ }
+
+ if (!wants_force_mount) {
+ if (!lxc_list_empty(&handler->conf->keepcaps))
+ wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
+ else
+ wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
+ }
+
+ has_cgns = cgns_supported();
+ if (has_cgns && !wants_force_mount)
+ return true;
+
+ if (type == LXC_AUTO_CGROUP_NOSPEC)
+ type = LXC_AUTO_CGROUP_MIXED;
+ else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
+ type = LXC_AUTO_CGROUP_FULL_MIXED;
+
+ /* Mount tmpfs */
+ tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
+ if (mkdir_p(tmpfspath, 0755) < 0) {
+ ERROR("Failed to create directory: %s", tmpfspath);
+ goto on_error;
+ }
+ ret = safe_mount(NULL, tmpfspath, "tmpfs",
+ MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
+ "size=10240k,mode=755", root);
+ if (ret < 0)
+ goto on_error;
+
+ for (i = 0; ops->hierarchies[i]; i++) {
+ char *controllerpath = NULL;
+ char *path2 = NULL;
+ struct hierarchy *h = ops->hierarchies[i];
+ char *controller = strrchr(h->mountpoint, '/');
+
+ if (!controller)
+ continue;
+ controller++;
+
+ // isulad: symlink subcgroup
+ if (strchr(controller, ',') != NULL) {
+ int pret;
+ pret = lxc_append_string(&merged, controller);
+ if (pret < 0)
+ goto on_error;
+ }
+
+ controllerpath = must_make_path(tmpfspath, controller, NULL);
+ if (dir_exists(controllerpath)) {
+ free(controllerpath);
+ continue;
+ }
+
+ ret = mkdir(controllerpath, 0755);
+ if (ret < 0) {
+ SYSERROR("Error creating cgroup path: %s", controllerpath);
+ free(controllerpath);
+ goto on_error;
+ }
+
+ if (has_cgns && wants_force_mount) {
+ /* If cgroup namespaces are supported but the container
+ * will not have CAP_SYS_ADMIN after it has started we
+ * need to mount the cgroups manually.
+ */
+ ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
+ free(controllerpath);
+ if (ret < 0)
+ goto on_error;
+
+ continue;
+ }
+
+ ret = cg_mount_cgroup_full(type, h, controllerpath);
+ if (ret < 0) {
+ free(controllerpath);
+ goto on_error;
+ }
+
+ if (!cg_mount_needs_subdirs(type)) {
+ free(controllerpath);
+ continue;
+ }
+
+ // isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container,
+ // isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container
+ path2 = must_make_path(controllerpath, NULL);
+ ret = mkdir_p(path2, 0755);
+ if (ret < 0) {
+ free(controllerpath);
+ free(path2);
+ goto on_error;
+ }
+
+ ret = cg_legacy_mount_controllers(type, h, controllerpath,
+ path2, ops->container_cgroup);
+ free(controllerpath);
+ free(path2);
+ if (ret < 0)
+ goto on_error;
+ }
+
+ // isulad: symlink subcgroup
+ if (merged) {
+ char **mc = NULL;
+ for (mc = merged; *mc; mc++) {
+ char *token = NULL;
+ char *copy = must_copy_string(*mc);
+ lxc_iterate_parts(token, copy, ",") {
+ int mret;
+ char *link;
+ link = must_make_path(tmpfspath, token, NULL);
+ mret = symlink(*mc, link);
+ if (mret < 0 && errno != EEXIST) {
+ SYSERROR("Failed to create link %s for target %s", link, *mc);
+ free(copy);
+ free(link);
+ goto on_error;
+ }
+ free(link);
+ }
+ free(copy);
+ }
+ }
+
+
+ // isulad: remount /sys/fs/cgroup to readonly
+ if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) {
+ ret = mount(tmpfspath, tmpfspath, "bind",
+ MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL);
+ if (ret < 0) {
+ SYSERROR("Failed to remount /sys/fs/cgroup.");
+ goto on_error;
+ }
+ }
+
+ retval = true;
+
+on_error:
+ free(tmpfspath);
+ lxc_free_array((void **)merged, free);
+ return retval;
+}
+#else
__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
struct lxc_handler *handler,
const char *root, int type)
@@ -1799,6 +2255,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
return true;
}
+#endif
/* Only root needs to escape to the cgroup of its init. */
__cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops,
@@ -2054,6 +2511,24 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
: NULL;
}
+#ifdef HAVE_ISULAD
+__cgfsng_ops static const char *cgfsng_get_cgroup_full_path(struct cgroup_ops *ops,
+ const char *controller)
+{
+ struct hierarchy *h;
+
+ h = get_hierarchy(ops, controller);
+ if (!h)
+ return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
+ controller ? controller : "(null)");
+
+ if (!h->container_full_path)
+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
+
+ return h->container_full_path;
+}
+#endif
+
/* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
* which must be freed by the caller.
*/
@@ -2360,6 +2835,44 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops,
return true;
}
+#ifdef HAVE_ISULAD
+__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
+ char *value, size_t len, const char *name,
+ const char *lxcpath)
+{
+ int ret = -1;
+ size_t controller_len;
+ char *controller, *p, *path;
+ struct hierarchy *h;
+
+ controller_len = strlen(filename);
+ controller = alloca(controller_len + 1);
+ (void)strlcpy(controller, filename, controller_len + 1);
+
+ p = strchr(controller, '.');
+ if (p)
+ *p = '\0';
+
+ const char *ori_path = ops->get_cgroup(ops, controller);
+ if (ori_path == NULL) {
+ ERROR("Failed to get cgroup path:%s", controller);
+ return -1;
+ }
+ path = safe_strdup(ori_path);
+
+ h = get_hierarchy(ops, controller);
+ if (h) {
+ char *fullpath;
+
+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+ ret = lxc_read_from_file(fullpath, value, len);
+ free(fullpath);
+ }
+ free(path);
+
+ return ret;
+}
+#else
/* Called externally (i.e. from 'lxc-cgroup') to query cgroup limits. Here we
* don't have a cgroup_data set up, so we ask the running container through the
* commands API for the cgroup path.
@@ -2397,6 +2910,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
return ret;
}
+#endif
static int device_cgroup_parse_access(struct device_item *device, const char *val)
{
@@ -2510,6 +3024,44 @@ static int device_cgroup_rule_parse(struct device_item *device, const char *key,
return device_cgroup_parse_access(device, ++val);
}
+#ifdef HAVE_ISULAD
+__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
+ const char *filename, const char *value,
+ const char *name, const char *lxcpath)
+{
+ int ret = -1;
+ size_t controller_len;
+ char *controller, *p, *path;
+ struct hierarchy *h;
+
+ controller_len = strlen(filename);
+ controller = alloca(controller_len + 1);
+ (void)strlcpy(controller, filename, controller_len + 1);
+
+ p = strchr(controller, '.');
+ if (p)
+ *p = '\0';
+
+ const char *ori_path = ops->get_cgroup(ops, controller);
+ if (ori_path == NULL) {
+ ERROR("Failed to get cgroup path:%s", controller);
+ return -1;
+ }
+ path = safe_strdup(ori_path);
+
+ h = get_hierarchy(ops, controller);
+ if (h) {
+ char *fullpath;
+
+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+ free(fullpath);
+ }
+ free(path);
+
+ return ret;
+}
+#else
/* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits. Here we
* don't have a cgroup_data set up, so we ask the running container through the
* commands API for the cgroup path.
@@ -2562,6 +3114,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
return ret;
}
+#endif
/* take devices cgroup line
* /dev/foo rwx
@@ -2686,6 +3239,199 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
return lxc_write_openat(h->container_full_path, filename, value, strlen(value));
}
+#ifdef HAVE_ISULAD
+/* Called from setup_limits - here we have the container's cgroup_data because
+ * we created the cgroups.
+ */
+static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename,
+ char *value, size_t len)
+{
+ char *fullpath = NULL;
+ char *p = NULL;
+ struct hierarchy *h = NULL;
+ int ret = 0;
+ char *controller = NULL;
+
+ len = strlen(filename);
+ if (SIZE_MAX - 1 < len) {
+ errno = EINVAL;
+ return -1;
+ }
+ controller = calloc(1, len + 1);
+ if (controller == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+ (void)strlcpy(controller, filename, len + 1);
+
+ p = strchr(controller, '.');
+ if (p)
+ *p = '\0';
+
+
+ h = get_hierarchy(ops, controller);
+ if (!h) {
+ ERROR("Failed to setup limits for the \"%s\" controller. "
+ "The controller seems to be unused by \"cgfsng\" cgroup "
+ "driver or not enabled on the cgroup hierarchy",
+ controller);
+ errno = ENOENT;
+ free(controller);
+ return -ENOENT;
+ }
+
+ fullpath = must_make_path(h->container_full_path, filename, NULL);
+ ret = lxc_read_from_file(fullpath, value, len);
+ free(fullpath);
+ free(controller);
+ return ret;
+}
+
+static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
+ const char *value)
+{
+ size_t len;
+ char *fullpath, *p;
+ /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
+ char converted_value[50];
+ struct hierarchy *h;
+ int ret = 0;
+ char *controller = NULL;
+ int retry_count = 0;
+ int max_retry = 10;
+ char *container_cgroup = ops->container_cgroup;
+
+ len = strlen(filename);
+ controller = alloca(len + 1);
+ (void)strlcpy(controller, filename, len + 1);
+
+ p = strchr(controller, '.');
+ if (p)
+ *p = '\0';
+
+ if (strcmp("devices.allow", filename) == 0 && value[0] == '/') {
+ ret = convert_devpath(value, converted_value);
+ if (ret < 0)
+ return ret;
+ value = converted_value;
+ }
+
+ h = get_hierarchy(ops, controller);
+ if (!h) {
+ ERROR("Failed to setup limits for the \"%s\" controller. "
+ "The controller seems to be unused by \"cgfsng\" cgroup "
+ "driver or not enabled on the cgroup hierarchy",
+ controller);
+ errno = ENOENT;
+ return -ENOENT;
+ }
+
+ fullpath = must_make_path(h->container_full_path, filename, NULL);
+
+retry:
+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+ if (ret != 0) {
+ if (retry_count < max_retry) {
+ SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath);
+ (void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup);
+ (void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755);
+ usleep(100 * 1000); /* 100 millisecond */
+ retry_count++;
+ goto retry;
+ }
+ lxc_write_error_message(ops->errfd,
+ "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".",
+ __FILE__, __LINE__, value, fullpath, strerror(errno));
+ }
+ free(fullpath);
+ return ret;
+}
+
+__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
+ struct lxc_conf *conf,
+ bool do_devices)
+{
+ __do_free struct lxc_list *sorted_cgroup_settings = NULL;
+ struct lxc_list *cgroup_settings = &conf->cgroup;
+ struct lxc_list *iterator, *next;
+ struct lxc_cgroup *cg;
+ bool ret = false;
+ char value[21 + 1] = { 0 };
+ long long int readvalue, setvalue;
+
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!conf)
+ return ret_set_errno(false, EINVAL);
+
+ cgroup_settings = &conf->cgroup;
+ if (lxc_list_empty(cgroup_settings))
+ return true;
+
+ if (!ops->hierarchies)
+ return ret_set_errno(false, EINVAL);
+
+ sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
+ if (!sorted_cgroup_settings)
+ return false;
+
+ lxc_list_for_each(iterator, sorted_cgroup_settings) {
+ cg = iterator->elem;
+
+ if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
+ if (isulad_cg_legacy_set_data(ops, cg->subsystem, cg->value)) {
+ if (do_devices && (errno == EACCES || errno == EPERM)) {
+ SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+ continue;
+ }
+ SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+ goto out;
+ }
+ DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cg->value);
+ }
+ // isulad: check cpu shares
+ if (strcmp(cg->subsystem, "cpu.shares") == 0) {
+ if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) {
+ SYSERROR("Error get %s", cg->subsystem);
+ goto out;
+ }
+ trim(value);
+ if (lxc_safe_long_long(cg->value, &setvalue) != 0) {
+ SYSERROR("Invalid value %s", cg->value);
+ goto out;
+ }
+ if (lxc_safe_long_long(value, &readvalue) != 0) {
+ SYSERROR("Invalid value %s", value);
+ goto out;
+ }
+ if (setvalue > readvalue) {
+ ERROR("The maximum allowed cpu-shares is %s", value);
+ lxc_write_error_message(ops->errfd,
+ "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".",
+ __FILE__, __LINE__, value);
+ goto out;
+ } else if (setvalue < readvalue) {
+ ERROR("The minimum allowed cpu-shares is %s", value);
+ lxc_write_error_message(ops->errfd,
+ "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".",
+ __FILE__, __LINE__, value);
+ goto out;
+ }
+ }
+ }
+
+ ret = true;
+ INFO("Limits for the legacy cgroup hierarchies have been setup");
+out:
+ lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
+ lxc_list_del(iterator);
+ free(iterator);
+ }
+
+ return ret;
+}
+#else
__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
struct lxc_conf *conf,
bool do_devices)
@@ -2739,6 +3485,7 @@ out:
return ret;
}
+#endif
/*
* Some of the parsing logic comes from the original cgroup device v1
@@ -2950,6 +3697,12 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
return true;
}
+#ifdef HAVE_ISULAD
+__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
+{
+ return true;
+}
+#else
__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
{
if (!ops)
@@ -2957,6 +3710,7 @@ __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup);
}
+#endif
__cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
{
@@ -3107,6 +3861,22 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
trim(base_cgroup);
prune_init_scope(base_cgroup);
+#ifdef HAVE_ISULAD
+ /* isulad: do not test writeable, if we run isulad in docker without cgroup namespace.
+ * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */
+
+ /*
+ * reason:base cgroup may be started with /system.slice when cg_hybrid_init
+ * read /proc/1/cgroup on host, and cgroup init will set all containers
+ * cgroup path under /sys/fs/cgroup/<controller>/system.slice/xxx/lxc
+ * directory, this is not consistent with docker. The default cgroup path
+ * should be under /sys/fs/cgroup/<controller>/lxc directory.
+ */
+
+ if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') {
+ base_cgroup[1] = '\0';
+ }
+#else
if (type == CGROUP2_SUPER_MAGIC)
writeable = test_writeable_v2(mountpoint, base_cgroup);
else
@@ -3115,7 +3885,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
TRACE("The %s group is not writeable", base_cgroup);
continue;
}
-
+#endif
if (type == CGROUP2_SUPER_MAGIC) {
char *cgv2_ctrl_path;
@@ -3268,7 +4038,45 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
}
-__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)
+#ifdef HAVE_ISULAD
+__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
+{
+ const char *cgroup_pattern;
+ const char *cgroup_tree;
+ __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL;
+ size_t len;
+
+ if (!ops)
+ return ret_set_errno(-1, ENOENT);
+
+ /* copy system-wide cgroup information */
+ cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+ if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0)
+ ops->cgroup_pattern = must_copy_string(cgroup_pattern);
+
+ if (conf->cgroup_meta.dir) {
+ cgroup_tree = conf->cgroup_meta.dir;
+ container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL);
+ } else if (ops->cgroup_pattern) {
+ __cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern);
+ if (!__cgroup_tree)
+ return ret_set_errno(-1, ENOMEM);
+
+ cgroup_tree = __cgroup_tree;
+ container_cgroup = must_concat(&len, cgroup_tree, NULL);
+ } else {
+ cgroup_tree = NULL;
+ container_cgroup = must_concat(&len, conf->name, NULL);
+ }
+ if (!container_cgroup)
+ return ret_set_errno(-1, ENOMEM);
+
+ ops->container_cgroup = move_ptr(container_cgroup);
+
+ return 0;
+}
+#else
+__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
{
const char *cgroup_pattern;
@@ -3282,6 +4090,7 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)
return 0;
}
+#endif
struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
{
@@ -3311,6 +4120,9 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;
cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
+#ifdef HAVE_ISULAD
+ cgfsng_ops->get_cgroup_full_path = cgfsng_get_cgroup_full_path;
+#endif
cgfsng_ops->get = cgfsng_get;
cgfsng_ops->set = cgfsng_set;
cgfsng_ops->freeze = cgfsng_freeze;
diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c
index 37fd0e3..ad46d5c 100644
--- a/src/lxc/cgroups/cgroup.c
+++ b/src/lxc/cgroups/cgroup.c
@@ -31,7 +31,7 @@ struct cgroup_ops *cgroup_init(struct lxc_conf *conf)
if (!cgroup_ops)
return log_error_errno(NULL, errno, "Failed to initialize cgroup driver");
- if (cgroup_ops->data_init(cgroup_ops)) {
+ if (cgroup_ops->data_init(cgroup_ops, conf)) {
cgroup_exit(cgroup_ops);
return log_error_errno(NULL, errno,
"Failed to initialize cgroup data");
@@ -79,7 +79,7 @@ void cgroup_exit(struct cgroup_ops *ops)
free((*it)->container_base_path);
free((*it)->container_full_path);
free((*it)->monitor_full_path);
- if ((*it)->cgfd_mon >= 0)
+ if ((*it)->cgfd_con >= 0)
close((*it)->cgfd_con);
if ((*it)->cgfd_mon >= 0)
close((*it)->cgfd_mon);
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index 1e08a01..dcdc76b 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -102,6 +102,10 @@ struct cgroup_ops {
char *container_cgroup;
char *monitor_cgroup;
+#ifdef HAVE_ISULAD
+ int errfd;
+#endif
+
/* @hierarchies
* - A NULL-terminated array of struct hierarchy, one per legacy
* hierarchy. No duplicates. First sufficient, writeable mounted
@@ -139,7 +143,7 @@ struct cgroup_ops {
*/
cgroup_layout_t cgroup_layout;
- int (*data_init)(struct cgroup_ops *ops);
+ int (*data_init)(struct cgroup_ops *ops, struct lxc_conf *conf);
void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
@@ -147,6 +151,9 @@ struct cgroup_ops {
bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
bool (*payload_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
+#ifdef HAVE_ISULAD
+ const char *(*get_cgroup_full_path)(struct cgroup_ops *ops, const char *controller);
+#endif
bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
int (*num_hierarchies)(struct cgroup_ops *ops);
bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out);
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index e3fce51..e806605 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -637,8 +637,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
+ #ifdef HAVE_ISULAD
+ { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
+ { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
+ #else
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
+ #endif
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
@@ -670,6 +675,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
if (!destination)
return -1;
+#ifdef HAVE_ISULAD
+ if (mkdir_p(destination, 0755) < 0) {
+ SYSERROR("Failed to create mount target '%s'", destination);
+ return log_error(-1, "Failed to mkdir destination %s", destination);
+ }
+#endif
+
mflags = add_required_remount_flags(source, destination,
default_mounts[i].flags);
r = safe_mount(source, destination, default_mounts[i].fstype,
diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index 2cf9994..160b3db 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -1755,8 +1755,13 @@ int lxc_rm_rf(const char *dirname)
struct dirent *direntp;
dir = opendir(dirname);
- if (!dir)
+ if (!dir) {
+ if (errno == ENOENT) {
+ WARN("Destroy path: \"%s\" do not exist", dirname);
+ return 0;
+ }
return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname);
+ }
while ((direntp = readdir(dir))) {
__do_free char *pathname = NULL;
@@ -1904,3 +1909,26 @@ int fix_stdio_permissions(uid_t uid)
return fret;
}
+
+#ifdef HAVE_ISULAD
+/* isulad: write error message */
+void lxc_write_error_message(int errfd, const char *format, ...)
+{
+ int ret;
+ char errbuf[BUFSIZ + 1] = {0};
+ ssize_t sret;
+ va_list argp;
+
+ if (errfd <= 0)
+ return;
+
+ va_start(argp, format);
+ ret = vsnprintf(errbuf, BUFSIZ, format, argp);
+ va_end(argp);
+ if (ret < 0 || ret >= BUFSIZ)
+ SYSERROR("Failed to call vsnprintf");
+ sret = write(errfd, errbuf, strlen(errbuf));
+ if (sret < 0)
+ SYSERROR("Write errbuf failed");
+}
+#endif
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 7b36133..3c30565 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -244,4 +244,8 @@ extern bool lxc_can_use_pidfd(int pidfd);
extern int fix_stdio_permissions(uid_t uid);
+#ifdef HAVE_ISULAD
+extern void lxc_write_error_message(int errfd, const char *format, ...);
+#endif
+
#endif /* __LXC_UTILS_H */
--
1.8.3.1