lxc/0005-cgroup-refact-cgroup-implemt.patch

From 41bc2b03f1c143352b025432a955e6a6dafd0e91 Mon Sep 17 00:00:00 2001
From: LiFeng <lifeng68@huawei.com>
Date: Sat, 11 Apr 2020 19:16:42 +0800
Subject: [PATCH 05/49] cgroup: refact cgroup implemt

Signed-off-by: LiFeng <lifeng68@huawei.com>
---
 src/lxc/cgroups/cgfsng.c | 816 ++++++++++++++++++++++++++++++++++++++++++++++-
 src/lxc/cgroups/cgroup.c |   4 +-
 src/lxc/cgroups/cgroup.h |   9 +-
 src/lxc/conf.c           |  12 +
 src/lxc/utils.c          |  30 +-
 src/lxc/utils.h          |   4 +
 6 files changed, 869 insertions(+), 6 deletions(-)

diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index d3595bc..881dd39 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1045,6 +1045,13 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
 		SYSWARN("Failed to destroy cgroups");
 }

+#ifdef HAVE_ISULAD
+__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
+						struct lxc_handler *handler)
+{
+	return;
+}
+#else
 __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
 						struct lxc_handler *handler)
 {
@@ -1117,6 +1124,7 @@ try_lxc_rm_rf:
 			WARN("Failed to destroy \"%s\"", h->monitor_full_path);
 	}
 }
+#endif

 static int mkdir_eexist_on_last(const char *dir, mode_t mode)
 {
@@ -1202,6 +1210,13 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload)
 		SYSWARN("Failed to rmdir(\"%s\") cgroup", full_path);
 }

+#ifdef HAVE_ISULAD
+__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
+						      struct lxc_handler *handler)
+{
+	return true;
+}
+#else
 __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
 						      struct lxc_handler *handler)
 {
@@ -1276,7 +1291,227 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
 	ops->monitor_cgroup = move_ptr(monitor_cgroup);
 	return log_info(true, "The monitor process uses \"%s\" as cgroup", ops->monitor_cgroup);
 }
+#endif
+
+#ifdef HAVE_ISULAD
+
+static bool isulad_copy_parent_file(char *path, char *file)
+{
+	int ret;
+	int len = 0;
+	char *value = NULL;
+	char *current = NULL;
+	char *fpath = NULL;
+	char *lastslash = NULL;
+	char oldv;
+
+	fpath = must_make_path(path, file, NULL);
+	current = read_file(fpath);
+
+	if (current == NULL) {
+		SYSERROR("Failed to read file \"%s\"", fpath);
+		free(fpath);
+		return false;
+	}
+
+	if (strcmp(current, "\n") != 0) {
+		free(fpath);
+		free(current);
+		return true;
+	}
+
+	free(fpath);
+	free(current);
+
+	lastslash = strrchr(path, '/');
+	if (lastslash == NULL) {
+		ERROR("Failed to detect \"/\" in \"%s\"", path);
+		return false;
+	}
+	oldv = *lastslash;
+	*lastslash = '\0';
+	fpath = must_make_path(path, file, NULL);
+	*lastslash = oldv;
+	len = lxc_read_from_file(fpath, NULL, 0);
+	if (len <= 0)
+		goto on_error;
+
+	value = must_realloc(NULL, len + 1);
+	ret = lxc_read_from_file(fpath, value, len);
+	if (ret != len)
+		goto on_error;
+	free(fpath);
+
+	fpath = must_make_path(path, file, NULL);
+	ret = lxc_write_to_file(fpath, value, len, false, 0666);
+	if (ret < 0)
+		SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath);
+	free(fpath);
+	free(value);
+	return ret >= 0;
+
+on_error:
+	SYSERROR("Failed to read file \"%s\"", fpath);
+	free(fpath);
+	free(value);
+	return false;
+}
+
+static bool build_sub_cpuset_cgroup_dir(char *cgpath)
+{
+	int ret;
+
+	ret = mkdir_p(cgpath, 0755);
+	if (ret < 0) {
+		if (errno != EEXIST) {
+			SYSERROR("Failed to create directory \"%s\"", cgpath);
+			return false;
+		}
+	}
+
+	/* copy parent's settings */
+	if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) {
+		SYSERROR("Failed to copy \"cpuset.cpus\" settings");
+		return false;
+	}
+
+	/* copy parent's settings */
+	if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) {
+		SYSERROR("Failed to copy \"cpuset.mems\" settings");
+		return false;
+	}
+
+	return true;
+}
+
+static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
+{
+	char *cgpath, *slash;
+	bool sub_mk_success = false;
+
+	if (!string_in_list(h->controllers, "cpuset"))
+		return true;
+
+	cgname += strspn(cgname, "/");
+
+	slash = strchr(cgname, '/');
+
+	if (slash != NULL) {
+		while (slash) {
+			*slash = '\0';
+			cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+			sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
+			free(cgpath);
+			*slash = '/';
+			if (!sub_mk_success) {
+				return false;
+			}
+			slash = strchr(slash + 1, '/');
+		}
+	}
+
+	cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+	sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
+	free(cgpath);
+	if (!sub_mk_success) {
+		return false;
+	}
+
+	return true;
+}
+
+static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode)
+{
+	const char *tmp = dir;
+	const char *orig = dir;
+
+	do {
+		int ret;
+		size_t cur_len;
+		char *makeme;
+
+		dir = tmp + strspn(tmp, "/");
+		tmp = dir + strcspn(dir, "/");
+
+		errno = ENOMEM;
+		cur_len = dir - orig;
+		makeme = strndup(orig, cur_len);
+		if (!makeme)
+			return -1;
+
+		ret = mkdir(makeme, mode);
+		if (ret < 0) {
+			if (errno != EEXIST) {
+				SYSERROR("Failed to create directory \"%s\"", makeme);
+				free(makeme);
+				return -1;
+			}
+		}
+		free(makeme);
+
+	} while (tmp != dir);
+
+	return 0;
+}
+
+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd)
+{
+	int ret;
+	__do_free char *path = NULL;
+
+	path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+
+	if (file_exists(path)) { // it must not already exist
+		ERROR("Cgroup path \"%s\" already exist.", path);
+		lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.",
+		                        __FILE__, __LINE__, path);
+		return false;
+	}
+
+	if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+		ERROR("Failed to handle legacy cpuset controller");
+		return false;
+	}
+
+	ret = isulad_mkdir_eexist_on_last(path, 0755);
+	if (ret < 0) {
+		ERROR("Failed to create cgroup \"%s\"", path);
+		return false;
+	}
+
+	h->cgfd_con = lxc_open_dirfd(path);
+	if (h->cgfd_con < 0)
+		return log_error_errno(false, errno, "Failed to open %s", path);
+
+	if (h->container_full_path == NULL) {
+		h->container_full_path = move_ptr(path);
+	}
+
+	return true;
+}

+/* isulad: create hierarchies path, if fail, return the error */
+__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
+						      struct lxc_handler *handler)
+{
+	int i;
+	char *container_cgroup = ops->container_cgroup;
+
+	if (!container_cgroup) {
+		ERROR("cgfsng_create container_cgroup is invalid");
+		return false;
+	}
+
+	for (i = 0; ops->hierarchies[i]; i++) {
+		if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) {
+			SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path);
+			return false;
+		}
+	}
+
+	return true;
+}
+#else
 /*
  * Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
  * next cgroup_pattern-1, -2, ..., -999.
@@ -1356,7 +1591,15 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
 	INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup);
 	return true;
 }
+#endif

+#ifdef HAVE_ISULAD
+__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
+					      struct lxc_handler *handler)
+{
+	return true;
+}
+#else
 __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
 					      struct lxc_handler *handler)
 {
@@ -1408,7 +1651,58 @@ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,

 	return true;
 }
+#endif
+
+#ifdef HAVE_ISULAD
+__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
+					      struct lxc_handler *handler)
+{
+	int len;
+	char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!ops->hierarchies)
+		return true;
+
+	if (!ops->container_cgroup)
+		return ret_set_errno(false, ENOENT);
+
+	if (!handler || !handler->conf)
+		return ret_set_errno(false, EINVAL);
+
+	len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
+
+	for (int i = 0; ops->hierarchies[i]; i++) {
+		int ret;
+		char *fullpath;
+		int retry_count = 0;
+		int max_retry = 10;

+		fullpath = must_make_path(ops->hierarchies[i]->container_full_path,
+		                          "cgroup.procs", NULL);
+retry:
+		ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
+		if (ret != 0) {
+			if (retry_count < max_retry) {
+				SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count);
+				(void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup);
+				(void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755);
+				usleep(100 * 1000); /* 100 millisecond */
+				retry_count++;
+				goto retry;
+			}
+			SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
+			free(fullpath);
+			return false;
+		}
+		free(fullpath);
+	}
+
+	return true;
+}
+#else
 __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
 					      struct lxc_handler *handler)
 {
@@ -1440,6 +1734,7 @@ __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,

 	return true;
 }
+#endif

 static int fchowmodat(int dirfd, const char *path, uid_t chown_uid,
 		      gid_t chown_gid, mode_t chmod_mode)
@@ -1687,6 +1982,167 @@ static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
 	return __cg_mount_direct(type, h, controllerpath);
 }

+#ifdef HAVE_ISULAD
+__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
+                                      struct lxc_handler *handler,
+                                      const char *root, int type)
+{
+	int i, ret;
+	char *tmpfspath = NULL;
+	bool has_cgns = false, retval = false, wants_force_mount = false;
+	char **merged = NULL;
+
+	if ((type & LXC_AUTO_CGROUP_MASK) == 0)
+		return true;
+
+	if (type & LXC_AUTO_CGROUP_FORCE) {
+		type &= ~LXC_AUTO_CGROUP_FORCE;
+		wants_force_mount = true;
+	}
+
+	if (!wants_force_mount) {
+		if (!lxc_list_empty(&handler->conf->keepcaps))
+			wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
+		else
+			wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
+	}
+
+	has_cgns = cgns_supported();
+	if (has_cgns && !wants_force_mount)
+		return true;
+
+	if (type == LXC_AUTO_CGROUP_NOSPEC)
+		type = LXC_AUTO_CGROUP_MIXED;
+	else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
+		type = LXC_AUTO_CGROUP_FULL_MIXED;
+
+	/* Mount tmpfs */
+	tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
+	if (mkdir_p(tmpfspath, 0755) < 0) {
+		ERROR("Failed to create directory: %s", tmpfspath);
+		goto on_error;
+	}
+	ret = safe_mount(NULL, tmpfspath, "tmpfs",
+	                 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
+	                 "size=10240k,mode=755", root);
+	if (ret < 0)
+		goto on_error;
+
+	for (i = 0; ops->hierarchies[i]; i++) {
+		char *controllerpath = NULL;
+		char *path2 = NULL;
+		struct hierarchy *h = ops->hierarchies[i];
+		char *controller = strrchr(h->mountpoint, '/');
+
+		if (!controller)
+			continue;
+		controller++;
+
+		// isulad: symlink subcgroup
+		if (strchr(controller, ',') != NULL) {
+			int pret;
+			pret = lxc_append_string(&merged, controller);
+			if (pret < 0)
+				goto on_error;
+		}
+
+		controllerpath = must_make_path(tmpfspath, controller, NULL);
+		if (dir_exists(controllerpath)) {
+			free(controllerpath);
+			continue;
+		}
+
+		ret = mkdir(controllerpath, 0755);
+		if (ret < 0) {
+			SYSERROR("Error creating cgroup path: %s", controllerpath);
+			free(controllerpath);
+			goto on_error;
+		}
+
+		if (has_cgns && wants_force_mount) {
+			/* If cgroup namespaces are supported but the container
+			 * will not have CAP_SYS_ADMIN after it has started we
+			 * need to mount the cgroups manually.
+			 */
+			ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
+			free(controllerpath);
+			if (ret < 0)
+				goto on_error;
+
+			continue;
+		}
+
+		ret = cg_mount_cgroup_full(type, h, controllerpath);
+		if (ret < 0) {
+			free(controllerpath);
+			goto on_error;
+		}
+
+		if (!cg_mount_needs_subdirs(type)) {
+			free(controllerpath);
+			continue;
+		}
+
+		// isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container,
+		// isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container
+		path2 = must_make_path(controllerpath, NULL);
+		ret = mkdir_p(path2, 0755);
+		if (ret < 0) {
+			free(controllerpath);
+			free(path2);
+			goto on_error;
+		}
+
+		ret = cg_legacy_mount_controllers(type, h, controllerpath,
+		                                  path2, ops->container_cgroup);
+		free(controllerpath);
+		free(path2);
+		if (ret < 0)
+			goto on_error;
+	}
+
+	// isulad: symlink subcgroup
+	if (merged) {
+		char **mc = NULL;
+		for (mc = merged; *mc; mc++) {
+			char *token = NULL;
+			char *copy = must_copy_string(*mc);
+			lxc_iterate_parts(token, copy, ",") {
+				int mret;
+				char *link;
+				link = must_make_path(tmpfspath, token, NULL);
+				mret = symlink(*mc, link);
+				if (mret < 0 && errno != EEXIST) {
+					SYSERROR("Failed to create link %s for target %s", link, *mc);
+					free(copy);
+					free(link);
+					goto on_error;
+				}
+				free(link);
+			}
+			free(copy);
+		}
+	}
+
+
+	// isulad: remount /sys/fs/cgroup to readonly
+	if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) {
+		ret = mount(tmpfspath, tmpfspath, "bind",
+		            MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL);
+		if (ret < 0) {
+			SYSERROR("Failed to remount /sys/fs/cgroup.");
+			goto on_error;
+		}
+	}
+
+	retval = true;
+
+on_error:
+	free(tmpfspath);
+	lxc_free_array((void **)merged, free);
+	return retval;
+}
+#else
 __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 				      struct lxc_handler *handler,
 				      const char *root, int type)
@@ -1799,6 +2255,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,

 	return true;
 }
+#endif

 /* Only root needs to escape to the cgroup of its init. */
 __cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops,
@@ -2054,6 +2511,24 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
 		   : NULL;
 }

+#ifdef HAVE_ISULAD
+__cgfsng_ops static const char *cgfsng_get_cgroup_full_path(struct cgroup_ops *ops,
+						  const char *controller)
+{
+	struct hierarchy *h;
+
+	h = get_hierarchy(ops, controller);
+	if (!h)
+		return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
+				      controller ? controller : "(null)");
+
+	if (!h->container_full_path)
+		h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
+
+	return h->container_full_path;
+}
+#endif
+
 /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
  * which must be freed by the caller.
  */
@@ -2360,6 +2835,44 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops,
 	return true;
 }

+#ifdef HAVE_ISULAD
+__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
+                                   char *value, size_t len, const char *name,
+                                   const char *lxcpath)
+{
+	int ret = -1;
+	size_t controller_len;
+	char *controller, *p, *path;
+	struct hierarchy *h;
+
+	controller_len = strlen(filename);
+	controller = alloca(controller_len + 1);
+	(void)strlcpy(controller, filename, controller_len + 1);
+
+	p = strchr(controller, '.');
+	if (p)
+		*p = '\0';
+
+	const char *ori_path = ops->get_cgroup(ops, controller);
+	if (ori_path == NULL) {
+		ERROR("Failed to get cgroup path:%s", controller);
+		return -1;
+	}
+	path = safe_strdup(ori_path);
+
+	h = get_hierarchy(ops, controller);
+	if (h) {
+		char *fullpath;
+
+		fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+		ret = lxc_read_from_file(fullpath, value, len);
+		free(fullpath);
+	}
+	free(path);
+
+	return ret;
+}
+#else
 /* Called externally (i.e. from 'lxc-cgroup') to query cgroup limits.  Here we
  * don't have a cgroup_data set up, so we ask the running container through the
  * commands API for the cgroup path.
@@ -2397,6 +2910,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,

 	return ret;
 }
+#endif

 static int device_cgroup_parse_access(struct device_item *device, const char *val)
 {
@@ -2510,6 +3024,44 @@ static int device_cgroup_rule_parse(struct device_item *device, const char *key,
 	return device_cgroup_parse_access(device, ++val);
 }

+#ifdef HAVE_ISULAD
+__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
+                                   const char *filename, const char *value,
+                                   const char *name, const char *lxcpath)
+{
+	int ret = -1;
+	size_t controller_len;
+	char *controller, *p, *path;
+	struct hierarchy *h;
+
+	controller_len = strlen(filename);
+	controller = alloca(controller_len + 1);
+	(void)strlcpy(controller, filename, controller_len + 1);
+
+	p = strchr(controller, '.');
+	if (p)
+		*p = '\0';
+
+	const char *ori_path = ops->get_cgroup(ops, controller);
+	if (ori_path == NULL) {
+		ERROR("Failed to get cgroup path:%s", controller);
+		return -1;
+	}
+	path = safe_strdup(ori_path);
+
+	h = get_hierarchy(ops, controller);
+	if (h) {
+		char *fullpath;
+
+		fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+		ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+		free(fullpath);
+	}
+	free(path);
+
+	return ret;
+}
+#else
 /* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits.  Here we
  * don't have a cgroup_data set up, so we ask the running container through the
  * commands API for the cgroup path.
@@ -2562,6 +3114,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,

 	return ret;
 }
+#endif

 /* take devices cgroup line
  *    /dev/foo rwx
@@ -2686,6 +3239,199 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
 	return lxc_write_openat(h->container_full_path, filename, value, strlen(value));
 }

+#ifdef HAVE_ISULAD
+/* Called from setup_limits - here we have the container's cgroup_data because
+ * we created the cgroups.
+ */
+static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename,
+                              char *value, size_t len)
+{
+	char *fullpath = NULL;
+	char *p = NULL;
+	struct hierarchy *h = NULL;
+	int ret = 0;
+	char *controller = NULL;
+
+	len = strlen(filename);
+	if (SIZE_MAX - 1 < len) {
+		errno = EINVAL;
+		return -1;
+	}
+	controller = calloc(1, len + 1);
+	if (controller == NULL) {
+		errno = ENOMEM;
+		return -1;
+	}
+	(void)strlcpy(controller, filename, len + 1);
+
+	p = strchr(controller, '.');
+	if (p)
+		*p = '\0';
+
+
+	h = get_hierarchy(ops, controller);
+	if (!h) {
+		ERROR("Failed to setup limits for the \"%s\" controller. "
+		      "The controller seems to be unused by \"cgfsng\" cgroup "
+		      "driver or not enabled on the cgroup hierarchy",
+		      controller);
+		errno = ENOENT;
+		free(controller);
+		return -ENOENT;
+	}
+
+	fullpath = must_make_path(h->container_full_path, filename, NULL);
+	ret = lxc_read_from_file(fullpath, value, len);
+	free(fullpath);
+	free(controller);
+	return ret;
+}
+
+static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
+                              const char *value)
+{
+	size_t len;
+	char *fullpath, *p;
+	/* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
+	char converted_value[50];
+	struct hierarchy *h;
+	int ret = 0;
+	char *controller = NULL;
+	int retry_count = 0;
+	int max_retry = 10;
+	char *container_cgroup = ops->container_cgroup;
+
+	len = strlen(filename);
+	controller = alloca(len + 1);
+	(void)strlcpy(controller, filename, len + 1);
+
+	p = strchr(controller, '.');
+	if (p)
+		*p = '\0';
+
+	if (strcmp("devices.allow", filename) == 0 && value[0] == '/') {
+		ret = convert_devpath(value, converted_value);
+		if (ret < 0)
+			return ret;
+		value = converted_value;
+	}
+
+	h = get_hierarchy(ops, controller);
+	if (!h) {
+		ERROR("Failed to setup limits for the \"%s\" controller. "
+		      "The controller seems to be unused by \"cgfsng\" cgroup "
+		      "driver or not enabled on the cgroup hierarchy",
+		      controller);
+		errno = ENOENT;
+		return -ENOENT;
+	}
+
+	fullpath = must_make_path(h->container_full_path, filename, NULL);
+
+retry:
+	ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+	if (ret != 0) {
+		if (retry_count < max_retry) {
+			SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath);
+			(void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup);
+			(void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755);
+			usleep(100 * 1000); /* 100 millisecond */
+			retry_count++;
+			goto retry;
+		}
+		lxc_write_error_message(ops->errfd,
+		                        "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".",
+		                        __FILE__, __LINE__, value, fullpath, strerror(errno));
+	}
+	free(fullpath);
+	return ret;
+}
+
+__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
+						    struct lxc_conf *conf,
+						    bool do_devices)
+{
+	__do_free struct lxc_list *sorted_cgroup_settings = NULL;
+	struct lxc_list *cgroup_settings = &conf->cgroup;
+	struct lxc_list *iterator, *next;
+	struct lxc_cgroup *cg;
+	bool ret = false;
+	char value[21 + 1] = { 0 };
+	long long int readvalue, setvalue;
+
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!conf)
+		return ret_set_errno(false, EINVAL);
+
+	cgroup_settings = &conf->cgroup;
+	if (lxc_list_empty(cgroup_settings))
+		return true;
+
+	if (!ops->hierarchies)
+		return ret_set_errno(false, EINVAL);
+
+	sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
+	if (!sorted_cgroup_settings)
+		return false;
+
+	lxc_list_for_each(iterator, sorted_cgroup_settings) {
+		cg = iterator->elem;
+
+		if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
+			if (isulad_cg_legacy_set_data(ops, cg->subsystem, cg->value)) {
+				if (do_devices && (errno == EACCES || errno == EPERM)) {
+					SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+					continue;
+				}
+				SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+				goto out;
+			}
+			DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cg->value);
+		}
+		// isulad: check cpu shares
+		if (strcmp(cg->subsystem, "cpu.shares") == 0) {
+			if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) {
+				SYSERROR("Error get %s", cg->subsystem);
+				goto out;
+			}
+			trim(value);
+			if (lxc_safe_long_long(cg->value, &setvalue) != 0) {
+				SYSERROR("Invalid value %s", cg->value);
+				goto out;
+			}
+			if (lxc_safe_long_long(value, &readvalue) != 0) {
+				SYSERROR("Invalid value %s", value);
+				goto out;
+			}
+			if (setvalue > readvalue) {
+				ERROR("The maximum allowed cpu-shares is %s", value);
+				lxc_write_error_message(ops->errfd,
+				                        "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".",
+				                        __FILE__, __LINE__, value);
+				goto out;
+			} else if (setvalue < readvalue) {
+				ERROR("The minimum allowed cpu-shares is %s", value);
+				lxc_write_error_message(ops->errfd,
+				                        "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".",
+				                        __FILE__, __LINE__, value);
+				goto out;
+			}
+		}
+	}
+
+	ret = true;
+	INFO("Limits for the legacy cgroup hierarchies have been setup");
+out:
+	lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
+		lxc_list_del(iterator);
+		free(iterator);
+	}
+
+	return ret;
+}
+#else
 __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
 						    struct lxc_conf *conf,
 						    bool do_devices)
@@ -2739,6 +3485,7 @@ out:

 	return ret;
 }
+#endif

 /*
  * Some of the parsing logic comes from the original cgroup device v1
@@ -2950,6 +3697,12 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
 	return true;
 }

+#ifdef HAVE_ISULAD
+__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
+{
+	return true;
+}
+#else
 __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
 {
 	if (!ops)
@@ -2957,6 +3710,7 @@ __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)

 	return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup);
 }
+#endif

 __cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
 {
@@ -3107,6 +3861,22 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg

 		trim(base_cgroup);
 		prune_init_scope(base_cgroup);
+#ifdef HAVE_ISULAD
+		/* isulad: do not test writeable, if we run isulad in docker without cgroup namespace.
+		 * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */
+
+		/*
+		 * reason:base cgroup may be started with /system.slice when cg_hybrid_init
+		 *	read /proc/1/cgroup on host, and cgroup init will set all containers
+		 *	cgroup path under /sys/fs/cgroup/<controller>/system.slice/xxx/lxc
+		 *	directory, this is not consistent with docker. The default cgroup path
+		 *	should be under /sys/fs/cgroup/<controller>/lxc directory.
+		 */
+
+		if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') {
+			base_cgroup[1] = '\0';
+		}
+#else
 		if (type == CGROUP2_SUPER_MAGIC)
 			writeable = test_writeable_v2(mountpoint, base_cgroup);
 		else
@@ -3115,7 +3885,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
 			TRACE("The %s group is not writeable", base_cgroup);
 			continue;
 		}
-
+#endif
 		if (type == CGROUP2_SUPER_MAGIC) {
 			char *cgv2_ctrl_path;

@@ -3268,7 +4038,45 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
 	return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
 }

-__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)
+#ifdef HAVE_ISULAD
+__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
+{
+	const char *cgroup_pattern;
+	const char *cgroup_tree;
+	__do_free char *container_cgroup = NULL, *__cgroup_tree = NULL;
+	size_t len;
+
+	if (!ops)
+		return ret_set_errno(-1, ENOENT);
+
+	/* copy system-wide cgroup information */
+	cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+	if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0)
+		ops->cgroup_pattern = must_copy_string(cgroup_pattern);
+
+	if (conf->cgroup_meta.dir) {
+		cgroup_tree = conf->cgroup_meta.dir;
+		container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL);
+	} else if (ops->cgroup_pattern) {
+		__cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern);
+		if (!__cgroup_tree)
+			return ret_set_errno(-1, ENOMEM);
+
+		cgroup_tree = __cgroup_tree;
+		container_cgroup = must_concat(&len, cgroup_tree, NULL);
+	} else {
+		cgroup_tree = NULL;
+		container_cgroup = must_concat(&len, conf->name, NULL);
+	}
+	if (!container_cgroup)
+		return ret_set_errno(-1, ENOMEM);
+
+	ops->container_cgroup = move_ptr(container_cgroup);
+
+	return 0;
+}
+#else
+__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
 {
 	const char *cgroup_pattern;

@@ -3282,6 +4090,7 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)

 	return 0;
 }
+#endif

 struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
 {
@@ -3311,6 +4120,9 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
 	cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
 	cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;
 	cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
+#ifdef HAVE_ISULAD
+	cgfsng_ops->get_cgroup_full_path = cgfsng_get_cgroup_full_path;
+#endif
 	cgfsng_ops->get = cgfsng_get;
 	cgfsng_ops->set = cgfsng_set;
 	cgfsng_ops->freeze = cgfsng_freeze;
diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c
index 37fd0e3..ad46d5c 100644
--- a/src/lxc/cgroups/cgroup.c
+++ b/src/lxc/cgroups/cgroup.c
@@ -31,7 +31,7 @@ struct cgroup_ops *cgroup_init(struct lxc_conf *conf)
 	if (!cgroup_ops)
 		return log_error_errno(NULL, errno, "Failed to initialize cgroup driver");

-	if (cgroup_ops->data_init(cgroup_ops)) {
+	if (cgroup_ops->data_init(cgroup_ops, conf)) {
 		cgroup_exit(cgroup_ops);
 		return log_error_errno(NULL, errno,
 				       "Failed to initialize cgroup data");
@@ -79,7 +79,7 @@ void cgroup_exit(struct cgroup_ops *ops)
 		free((*it)->container_base_path);
 		free((*it)->container_full_path);
 		free((*it)->monitor_full_path);
-		if ((*it)->cgfd_mon >= 0)
+		if ((*it)->cgfd_con >= 0)
 			close((*it)->cgfd_con);
 		if ((*it)->cgfd_mon >= 0)
 			close((*it)->cgfd_mon);
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
index 1e08a01..dcdc76b 100644
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -102,6 +102,10 @@ struct cgroup_ops {
 	char *container_cgroup;
 	char *monitor_cgroup;

+#ifdef HAVE_ISULAD
+	int errfd;
+#endif
+
 	/* @hierarchies
 	 * - A NULL-terminated array of struct hierarchy, one per legacy
 	 *   hierarchy. No duplicates. First sufficient, writeable mounted
@@ -139,7 +143,7 @@ struct cgroup_ops {
 	 */
 	cgroup_layout_t cgroup_layout;

-	int (*data_init)(struct cgroup_ops *ops);
+	int (*data_init)(struct cgroup_ops *ops, struct lxc_conf *conf);
 	void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
 	void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
 	bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
@@ -147,6 +151,9 @@ struct cgroup_ops {
 	bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
 	bool (*payload_enter)(struct cgroup_ops *ops, struct lxc_handler *handler);
 	const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
+#ifdef HAVE_ISULAD
+	const char *(*get_cgroup_full_path)(struct cgroup_ops *ops, const char *controller);
+#endif
 	bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
 	int (*num_hierarchies)(struct cgroup_ops *ops);
 	bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out);
diff --git a/src/lxc/conf.c b/src/lxc/conf.c
index e3fce51..e806605 100644
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -637,8 +637,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
 		{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger",                          "%r/proc/sysrq-trigger",      NULL,    MS_BIND,                                         NULL },
 		{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL,                                             "%r/proc/sysrq-trigger",      NULL,    MS_REMOUNT|MS_BIND|MS_RDONLY,                    NULL },
 		{ LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW,    "proc",                                           "%r/proc",                    "proc",  MS_NODEV|MS_NOEXEC|MS_NOSUID,                    NULL },
+		#ifdef HAVE_ISULAD
+		{ LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_RW,     "sysfs",                                          "%r/sys",                     "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID,                    NULL },
+		{ LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_RO,     "sysfs",                                          "%r/sys",                     "sysfs", MS_RDONLY|MS_NODEV|MS_NOEXEC|MS_NOSUID,          NULL },
+		#else
 		{ LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_RW,     "sysfs",                                          "%r/sys",                     "sysfs", 0,                                               NULL },
 		{ LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_RO,     "sysfs",                                          "%r/sys",                     "sysfs", MS_RDONLY,                                       NULL },
+		#endif
 		{ LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_MIXED,  "sysfs",                                          "%r/sys",                     "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID,                    NULL },
 		{ LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_MIXED,  "%r/sys",                                         "%r/sys",                     NULL,    MS_BIND,                                         NULL },
 		{ LXC_AUTO_SYS_MASK,  LXC_AUTO_SYS_MIXED,  NULL,                                             "%r/sys",                     NULL,    MS_REMOUNT|MS_BIND|MS_RDONLY,                    NULL },
@@ -670,6 +675,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
 		if (!destination)
 			return -1;

+#ifdef HAVE_ISULAD
+		if (mkdir_p(destination, 0755) < 0) {
+			SYSERROR("Failed to create mount target '%s'", destination);
+			return log_error(-1, "Failed to mkdir destination %s", destination);
+		}
+#endif
+
 		mflags = add_required_remount_flags(source, destination,
 						    default_mounts[i].flags);
 		r = safe_mount(source, destination, default_mounts[i].fstype,
diff --git a/src/lxc/utils.c b/src/lxc/utils.c
index 2cf9994..160b3db 100644
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -1755,8 +1755,13 @@ int lxc_rm_rf(const char *dirname)
 	struct dirent *direntp;

 	dir = opendir(dirname);
-	if (!dir)
+	if (!dir) {
+		if (errno == ENOENT) {
+			WARN("Destroy path: \"%s\" do not exist", dirname);
+			return 0;
+		}
 		return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname);
+	}

 	while ((direntp = readdir(dir))) {
 		__do_free char *pathname = NULL;
@@ -1904,3 +1909,26 @@ int fix_stdio_permissions(uid_t uid)

 	return fret;
 }
+
+#ifdef HAVE_ISULAD
+/* isulad: write error message */
+void lxc_write_error_message(int errfd, const char *format, ...)
+{
+	int ret;
+	char errbuf[BUFSIZ + 1] = {0};
+	ssize_t sret;
+	va_list argp;
+
+	if (errfd <= 0)
+		return;
+
+	va_start(argp, format);
+	ret = vsnprintf(errbuf, BUFSIZ, format, argp);
+	va_end(argp);
+	if (ret < 0 || ret >= BUFSIZ)
+		SYSERROR("Failed to call vsnprintf");
+	sret = write(errfd, errbuf, strlen(errbuf));
+	if (sret < 0)
+		SYSERROR("Write errbuf failed");
+}
+#endif
diff --git a/src/lxc/utils.h b/src/lxc/utils.h
index 7b36133..3c30565 100644
--- a/src/lxc/utils.h
+++ b/src/lxc/utils.h
@@ -244,4 +244,8 @@ extern bool lxc_can_use_pidfd(int pidfd);

 extern int fix_stdio_permissions(uid_t uid);

+#ifdef HAVE_ISULAD
+extern void lxc_write_error_message(int errfd, const char *format, ...);
+#endif
+
 #endif /* __LXC_UTILS_H */
--
1.8.3.1