From 8a62b519510080bb361cdd058d0e7a5edd955a95 Mon Sep 17 00:00:00 2001 From: lifeng68 Date: Wed, 15 Jul 2020 09:32:32 +0800 Subject: [PATCH] huawei: adapt to huawei 4.0.3 Signed-off-by: lifeng68 --- CODING_STYLE.md | 13 +- .../apparmor/abstractions/start-container.in | 2 - config/init/common/lxc-net.in | 2 +- config/templates/common.conf.in | 31 - config/templates/userns.conf.in | 8 - config/yum/lxc-patch.py | 1 + configure.ac | 59 +- doc/ja/lxc.container.conf.sgml.in | 20 +- doc/lxc.container.conf.sgml.in | 18 +- hooks/Makefile.am | 3 + src/include/fexecve.c | 6 +- src/include/openpty.c | 28 +- src/include/openpty.h | 10 +- src/lxc/Makefile.am | 47 +- src/lxc/af_unix.c | 37 +- src/lxc/af_unix.h | 34 +- src/lxc/api_extensions.h | 1 - src/lxc/attach.c | 543 +++- src/lxc/attach.h | 6 + src/lxc/attach_options.h | 21 +- src/lxc/cgroups/cgfsng.c | 1363 +++++++-- src/lxc/cgroups/cgroup.c | 2 +- src/lxc/cgroups/cgroup.h | 27 +- src/lxc/cgroups/cgroup2_devices.c | 41 +- src/lxc/cmd/lxc-update-config.in | 2 +- src/lxc/cmd/lxc_init.c | 2 +- src/lxc/cmd/lxc_monitord.c | 2 +- src/lxc/cmd/lxc_user_nic.c | 60 +- src/lxc/cmd/lxc_usernsexec.c | 10 +- src/lxc/commands.c | 293 +- src/lxc/commands.h | 18 +- src/lxc/commands_utils.c | 5 +- src/lxc/compiler.h | 18 - src/lxc/conf.c | 2517 ++++++++++++++--- src/lxc/conf.h | 111 +- src/lxc/confile.c | 645 ++++- src/lxc/confile.h | 11 +- src/lxc/confile_utils.c | 12 - src/lxc/confile_utils.h | 10 +- src/lxc/criu.c | 27 +- src/lxc/exec_commands.c | 416 +++ src/lxc/exec_commands.h | 73 + src/lxc/execute.c | 23 +- src/lxc/file_utils.h | 49 +- src/lxc/initutils.c | 4 + src/lxc/isulad_utils.c | 99 + src/lxc/isulad_utils.h | 20 + src/lxc/json/defs.c | 205 ++ src/lxc/json/defs.h | 37 + src/lxc/json/json_common.c | 1153 ++++++++ src/lxc/json/json_common.h | 185 ++ src/lxc/json/logger_json_file.c | 246 ++ src/lxc/json/logger_json_file.h | 45 + src/lxc/json/oci_runtime_hooks.c | 52 + src/lxc/json/oci_runtime_hooks.h | 15 + src/lxc/json/oci_runtime_spec.c | 195 ++ src/lxc/json/oci_runtime_spec.h | 37 + src/lxc/json/read-file.c | 95 + src/lxc/json/read-file.h | 11 + src/lxc/log.c | 72 +- src/lxc/log.h | 23 +- src/lxc/lsm/apparmor.c | 12 +- src/lxc/lxc.h | 20 +- src/lxc/lxccontainer.c | 674 ++++- src/lxc/lxccontainer.h | 129 +- src/lxc/lxclock.c | 27 + src/lxc/lxclock.h | 4 + src/lxc/macro.h | 17 - src/lxc/mainloop.c | 15 +- src/lxc/mainloop.h | 4 - src/lxc/memory_utils.h | 8 +- src/lxc/namespace.c | 27 + src/lxc/namespace.h | 90 + src/lxc/network.c | 105 +- src/lxc/network.h | 4 +- src/lxc/path.c | 655 +++++ src/lxc/path.h | 65 + src/lxc/process_utils.h | 290 -- src/lxc/{process_utils.c => raw_syscalls.c} | 71 +- src/lxc/raw_syscalls.h | 94 + src/lxc/rexec.c | 14 +- src/lxc/seccomp.c | 582 +++- src/lxc/start.c | 1052 ++++++- src/lxc/start.h | 43 +- src/lxc/storage/block.c | 86 + src/lxc/storage/block.h | 41 + src/lxc/storage/btrfs.c | 11 + src/lxc/storage/dir.c | 36 +- src/lxc/storage/loop.c | 36 +- src/lxc/storage/overlay.c | 8 + src/lxc/storage/rsync.c | 8 +- src/lxc/storage/storage.c | 29 +- src/lxc/storage/storage_utils.c | 56 +- src/lxc/storage/zfs.c | 15 +- src/lxc/string_utils.c | 1 + src/lxc/sync.h | 4 + src/lxc/syscall_numbers.h | 112 +- src/lxc/syscall_wrappers.h | 24 - src/lxc/terminal.c | 1113 +++++++- src/lxc/terminal.h | 67 +- src/lxc/tools/arguments.h | 23 + src/lxc/tools/lxc_attach.c | 399 ++- src/lxc/tools/lxc_ls.c | 16 +- src/lxc/tools/lxc_start.c | 95 + src/lxc/utils.c | 297 +- src/lxc/utils.h | 95 +- src/lxc/uuid.c | 2 +- src/tests/Makefile.am | 10 +- src/tests/attach.c | 9 + src/tests/console.c | 24 +- src/tests/containertests.c | 2 +- src/tests/lxc-test-no-new-privs | 6 +- src/tests/lxc-test-usernsexec | 368 --- src/tests/lxc_raw_clone.c | 2 +- templates/lxc-oci.in | 3 +- 115 files changed, 13657 insertions(+), 2464 deletions(-) create mode 100644 src/lxc/exec_commands.c create mode 100644 src/lxc/exec_commands.h create mode 100644 src/lxc/isulad_utils.c create mode 100644 src/lxc/isulad_utils.h create mode 100644 src/lxc/json/defs.c create mode 100644 src/lxc/json/defs.h create mode 100755 src/lxc/json/json_common.c create mode 100755 src/lxc/json/json_common.h create mode 100644 src/lxc/json/logger_json_file.c create mode 100644 src/lxc/json/logger_json_file.h create mode 100644 src/lxc/json/oci_runtime_hooks.c create mode 100644 src/lxc/json/oci_runtime_hooks.h create mode 100644 src/lxc/json/oci_runtime_spec.c create mode 100644 src/lxc/json/oci_runtime_spec.h create mode 100644 src/lxc/json/read-file.c create mode 100644 src/lxc/json/read-file.h create mode 100644 src/lxc/path.c create mode 100644 src/lxc/path.h delete mode 100644 src/lxc/process_utils.h rename src/lxc/{process_utils.c => raw_syscalls.c} (68%) create mode 100644 src/lxc/raw_syscalls.h create mode 100644 src/lxc/storage/block.c create mode 100644 src/lxc/storage/block.h delete mode 100755 src/tests/lxc-test-usernsexec diff --git a/CODING_STYLE.md b/CODING_STYLE.md index bf8b304a5..6e2ad8562 100644 --- a/CODING_STYLE.md +++ b/CODING_STYLE.md @@ -733,11 +733,11 @@ __do_closedir __attribute__((__cleanup__(__auto_closedir__))) ``` For example: ```c -void turn_into_dependent_mounts(void) +void remount_all_slave(void) { __do_free char *line = NULL; __do_fclose FILE *f = NULL; - __do_close int memfd = -EBADF, mntinfo_fd = -EBADF; + __do_close_prot_errno int memfd = -EBADF, mntinfo_fd = -EBADF; int ret; ssize_t copied; size_t len = 0; @@ -780,7 +780,7 @@ again: return; } - f = fdopen(memfd, "re"); + f = fdopen(memfd, "r"); if (!f) { SYSERROR("Failed to open copy of \"/proc/self/mountinfo\" to mark all shared. Continuing"); return; @@ -810,11 +810,12 @@ again: null_endofword(target); ret = mount(NULL, target, NULL, MS_SLAVE, NULL); if (ret < 0) { - SYSERROR("Failed to recursively turn old root mount tree into dependent mount. Continuing..."); + SYSERROR("Failed to make \"%s\" MS_SLAVE", target); + ERROR("Continuing..."); continue; } - TRACE("Recursively turned old root mount tree into dependent mount"); + TRACE("Remounted \"%s\" as MS_SLAVE", target); } - TRACE("Turned all mount table entries into dependent mount"); + TRACE("Remounted all mount table entries as MS_SLAVE"); } ``` diff --git a/config/apparmor/abstractions/start-container.in b/config/apparmor/abstractions/start-container.in index 9998f1121..f2b48235d 100644 --- a/config/apparmor/abstractions/start-container.in +++ b/config/apparmor/abstractions/start-container.in @@ -21,8 +21,6 @@ # allow pre-mount hooks to stage mounts under /var/lib/lxc// mount -> /var/lib/lxc/{**,}, - mount /dev/.lxc-boot-id -> /proc/sys/kernel/random/boot_id, - # required for some pre-mount hooks mount fstype=overlayfs, mount fstype=aufs, diff --git a/config/init/common/lxc-net.in b/config/init/common/lxc-net.in index a7dfa6f19..df9f1181d 100644 --- a/config/init/common/lxc-net.in +++ b/config/init/common/lxc-net.in @@ -46,7 +46,7 @@ _ifdown() { _ifup() { MASK=`_netmask2cidr ${LXC_NETMASK}` CIDR_ADDR="${LXC_ADDR}/${MASK}" - ip addr add ${CIDR_ADDR} broadcast + dev ${LXC_BRIDGE} + ip addr add ${CIDR_ADDR} dev ${LXC_BRIDGE} ip link set dev ${LXC_BRIDGE} address $LXC_BRIDGE_MAC ip link set dev ${LXC_BRIDGE} up } diff --git a/config/templates/common.conf.in b/config/templates/common.conf.in index 286c5e4a3..c4b3bdcce 100644 --- a/config/templates/common.conf.in +++ b/config/templates/common.conf.in @@ -15,8 +15,6 @@ lxc.cap.drop = mac_admin mac_override sys_time sys_module sys_rawio # Ensure hostname is changed on clone lxc.hook.clone = @LXCHOOKDIR@/clonehostname -# Default legacy cgroup configuration -# # CGroup whitelist lxc.cgroup.devices.deny = a ## Allow any mknod (but not reading/writing the node) @@ -44,35 +42,6 @@ lxc.cgroup.devices.allow = c 136:* rwm ### fuse lxc.cgroup.devices.allow = c 10:229 rwm -# Default unified cgroup configuration -# -# CGroup whitelist -lxc.cgroup2.devices.deny = a -## Allow any mknod (but not reading/writing the node) -lxc.cgroup2.devices.allow = c *:* m -lxc.cgroup2.devices.allow = b *:* m -## Allow specific devices -### /dev/null -lxc.cgroup2.devices.allow = c 1:3 rwm -### /dev/zero -lxc.cgroup2.devices.allow = c 1:5 rwm -### /dev/full -lxc.cgroup2.devices.allow = c 1:7 rwm -### /dev/tty -lxc.cgroup2.devices.allow = c 5:0 rwm -### /dev/console -lxc.cgroup2.devices.allow = c 5:1 rwm -### /dev/ptmx -lxc.cgroup2.devices.allow = c 5:2 rwm -### /dev/random -lxc.cgroup2.devices.allow = c 1:8 rwm -### /dev/urandom -lxc.cgroup2.devices.allow = c 1:9 rwm -### /dev/pts/* -lxc.cgroup2.devices.allow = c 136:* rwm -### fuse -lxc.cgroup2.devices.allow = c 10:229 rwm - # Setup the default mounts lxc.mount.auto = cgroup:mixed proc:mixed sys:mixed lxc.mount.entry = /sys/fs/fuse/connections sys/fs/fuse/connections none bind,optional 0 0 diff --git a/config/templates/userns.conf.in b/config/templates/userns.conf.in index 69d992680..19013da5b 100644 --- a/config/templates/userns.conf.in +++ b/config/templates/userns.conf.in @@ -1,15 +1,7 @@ # CAP_SYS_ADMIN in init-user-ns is required for cgroup.devices -# -# Default legacy cgroup configuration -# lxc.cgroup.devices.deny = lxc.cgroup.devices.allow = -# Default unified cgroup configuration -# -lxc.cgroup2.devices.deny = -lxc.cgroup2.devices.allow = - # Start with a full set of capabilities in user namespaces. lxc.cap.drop = lxc.cap.keep = diff --git a/config/yum/lxc-patch.py b/config/yum/lxc-patch.py index fd48298d6..d639e8425 100644 --- a/config/yum/lxc-patch.py +++ b/config/yum/lxc-patch.py @@ -24,6 +24,7 @@ import os from fnmatch import fnmatch from yum.plugins import TYPE_INTERACTIVE +from yum.plugins import PluginYumExit requires_api_version = '2.0' plugin_type = (TYPE_INTERACTIVE,) diff --git a/configure.ac b/configure.ac index 059d57d38..9eb6dcb2b 100644 --- a/configure.ac +++ b/configure.ac @@ -43,6 +43,7 @@ AM_INIT_AUTOMAKE([-Wall -Werror -Wno-portability subdir-objects]) AC_CANONICAL_HOST AM_PROG_CC_C_O AC_USE_SYSTEM_EXTENSIONS +CFLAGS=`echo "${CFLAGS#\-g}"` # Test if we have a new enough compiler. AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ @@ -119,6 +120,9 @@ AM_CONDITIONAL([DISTRO_UBUNTU], [test "x$with_distro" = "xubuntu"]) AC_CONFIG_LINKS([config/etc/default.conf:config/etc/${distroconf}]) +# Check yajl +PKG_CHECK_MODULES([YAJL], [yajl >= 2],[],[AC_MSG_ERROR([You must install yajl >= 2])]) + # Check for init system type AC_MSG_CHECKING([for init system type]) AC_ARG_WITH([init-script], @@ -187,6 +191,11 @@ AC_ARG_ENABLE([werror], [AS_HELP_STRING([--disable-werror], [do not treat warnings as errors])], [enable_werror=$enableval], [enable_werror=yes]) +AC_ARG_ENABLE([debug], + [AC_HELP_STRING([--enable-debug], + [set -g into cflags [default=no]])], + [], [enable_debug=no]) + # Allow disabling rpath AC_ARG_ENABLE([rpath], [AS_HELP_STRING([--enable-rpath], [set rpath in executables [default=no]])], @@ -487,7 +496,7 @@ AC_ARG_WITH([rootfs-path], # cgroup pattern specification AC_ARG_WITH([cgroup-pattern], [AS_HELP_STRING([--with-cgroup-pattern=pattern], [pattern for container cgroups])], - [with_cgroup_pattern=$withval], [with_cgroup_pattern=['']]) + [with_cgroup_pattern=$withval], [with_cgroup_pattern=['lxc/%n']]) # The path for the apparmor_parser's cache for generated apparmor profiles AC_ARG_WITH([apparmor-cache-dir], @@ -622,10 +631,7 @@ AC_CHECK_HEADER([ifaddrs.h], AC_HEADER_MAJOR # Check for some syscalls functions -AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree execveat clone3]) -AC_CHECK_TYPES([struct clone_args], [], [], [[#include ]]) -AC_CHECK_MEMBERS([struct clone_args.set_tid],[],[],[[#include ]]) -AC_CHECK_MEMBERS([struct clone_args.cgroup],[],[],[[#include ]]) +AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create]) # Check for strerror_r() support. Defines: # - HAVE_STRERROR_R if available @@ -732,7 +738,6 @@ AX_CHECK_COMPILE_FLAG([-fno-strict-aliasing], [CFLAGS="$CFLAGS -fno-strict-alias AX_CHECK_COMPILE_FLAG([-fstack-clash-protection], [CFLAGS="$CFLAGS -fstack-clash-protection"],,[-Werror]) AX_CHECK_LINK_FLAG([-fstack-protector-strong], [CFLAGS="$CFLAGS -fstack-protector-strong"],,[-Werror]) AX_CHECK_LINK_FLAG([--param=ssp-buffer-size=4], [CFLAGS="$CFLAGS --param=ssp-buffer-size=4"],,[-Werror]) -AX_CHECK_COMPILE_FLAG([-g], [CFLAGS="$CFLAGS -g"],,[-Werror]) AX_CHECK_COMPILE_FLAG([--mcet -fcf-protection], [CFLAGS="$CFLAGS --mcet -fcf-protection"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-Wlogical-op], [CFLAGS="$CFLAGS -Wlogical-op"],,[-Werror]) @@ -756,40 +761,24 @@ AX_CHECK_COMPILE_FLAG([-Wnested-externs], [CFLAGS="$CFLAGS -Wnested-externs"],,[ AX_CHECK_COMPILE_FLAG([-fasynchronous-unwind-tables], [CFLAGS="$CFLAGS -fasynchronous-unwind-tables"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-pipe], [CFLAGS="$CFLAGS -pipe"],,[-Werror]) AX_CHECK_COMPILE_FLAG([-fexceptions], [CFLAGS="$CFLAGS -fexceptions"],,[-Werror]) -AX_CHECK_COMPILE_FLAG([-Warray-bounds], [CFLAGS="$CFLAGS -Warray-bounds"],,[-Werror]) -AX_CHECK_COMPILE_FLAG([-Wrestrict], [CFLAGS="$CFLAGS -Wrestrict"],,[-Werror]) -AX_CHECK_COMPILE_FLAG([-Wreturn-local-addr], [CFLAGS="$CFLAGS -Wreturn-local-addr"],,[-Werror]) -AX_CHECK_COMPILE_FLAG([-Wstringop-overflow], [CFLAGS="$CFLAGS -Wstringop-overflow"],,[-Werror]) AX_CHECK_LINK_FLAG([-z relro], [LDFLAGS="$LDFLAGS -z relro"],,[]) AX_CHECK_LINK_FLAG([-z now], [LDFLAGS="$LDFLAGS -z now"],,[]) +AX_CHECK_LINK_FLAG([-z noexecstack], [LDFLAGS="$LDFLAGS -z noexecstack"],,[]) -CFLAGS="$CFLAGS -Wvla -std=gnu11 -fms-extensions" +CFLAGS="$CFLAGS -Wvla -std=gnu11 -D_FORTIFY_SOURCE=2 -Wall -fPIC -fPIE -pie" if test "x$enable_werror" = "xyes"; then CFLAGS="$CFLAGS -Werror" fi +if test "x$enable_debug" = "xyes"; then + CFLAGS="$CFLAGS -g" +fi + AC_ARG_ENABLE([thread-safety], [AS_HELP_STRING([--enable-thread-safety], [enforce thread-safety otherwise fail the build [default=yes]])], [enable_thread_safety=$enableval], [enable_thread_safety=yes]) AM_CONDITIONAL([ENFORCE_THREAD_SAFETY], [test "x$enable_thread_safety" = "xyes"]) -if test "x$enable_thread_safety" = "xyes"; then - AC_DEFINE([ENFORCE_THREAD_SAFETY], 1, [enforce thread-safety otherwise fail the build]) - AC_MSG_RESULT([yes]) -else - AC_MSG_RESULT([no]) -fi - -AC_ARG_ENABLE([coverity-build], - [AS_HELP_STRING([--enable-coverity-build], [build for use with Coverity [default=no]])], - [enable_coverity_build=$enableval], [enable_coverity_build=no]) -AM_CONDITIONAL([ENABLE_COVERITY_BUILD], [test "x$enable_coverity_build" = "xyes"]) -if test "x$enable_coverity_build" = "xyes"; then - AC_DEFINE([ENABLE_COVERITY_BUILD], 1, [build for use with Coverity]) - AC_MSG_RESULT([yes]) -else - AC_MSG_RESULT([no]) -fi AC_ARG_ENABLE([dlog], [AS_HELP_STRING([--enable-dlog], [enable dlog support [default=no]])], @@ -815,6 +804,17 @@ else AC_MSG_RESULT([no]) fi +AC_MSG_CHECKING([Whether adapt to iSulad]) +AC_ARG_ENABLE([isulad], + [AC_HELP_STRING([--enable-isulad], [enable adapt to iSulad [default=yes]])], + [adapt_isulad=$enableval], [adapt_isulad=yes]) +AM_CONDITIONAL([HAVE_ISULAD], [test "x$adapt_isulad" = "xyes"]) +if test "x$adapt_isulad" = "xyes"; then + AC_DEFINE([HAVE_ISULAD], 1, [adapt to iSulad]) + AC_MSG_RESULT([yes]) +else + AC_MSG_RESULT([no]) +fi # Files requiring some variable expansion AC_CONFIG_FILES([ Makefile @@ -1061,10 +1061,9 @@ Documentation: - user documentation: $enable_doc Debugging: + - tests: $enable_tests - ASAN: $enable_asan - - Coverity: $enable_coverity_build - mutex debugging: $enable_mutex_debugging - - tests: $enable_tests Paths: - Logs in configpath: $enable_configpath_log diff --git a/doc/ja/lxc.container.conf.sgml.in b/doc/ja/lxc.container.conf.sgml.in index 38b623243..fc692b409 100644 --- a/doc/ja/lxc.container.conf.sgml.in +++ b/doc/ja/lxc.container.conf.sgml.in @@ -713,25 +713,25 @@ by KATOH Yasufumi modes are , and . It defaults to mode. In mode TX processing up to L3 happens on the stack instance - attached to the dependent device and packets are switched to the stack instance of the - parent device for the L2 processing and routing from that instance will be - used before packets are queued on the outbound device. In this mode the dependent devices + attached to the slave device and packets are switched to the stack instance of the + master device for the L2 processing and routing from that instance will be + used before packets are queued on the outbound device. In this mode the slaves will not receive nor can send multicast / broadcast traffic. In mode TX processing is very similar to the L3 mode except that iptables (conn-tracking) works in this mode and hence it is L3-symmetric (L3s). This will have slightly less performance but that shouldn't matter since you are choosing this mode over plain-L3 mode to make conn-tracking work. In mode TX processing happens on the stack instance attached to - the dependent device and packets are switched and queued to the parent device to send - out. In this mode the dependent devices will RX/TX multicast and broadcast (if applicable) as well. + the slave device and packets are switched and queued to the master device to send + out. In this mode the slaves will RX/TX multicast and broadcast (if applicable) as well. specifies the isolation mode. The accepted isolation values are , and . It defaults to . - In isolation mode dependent devices can cross-talk among themselves - apart from talking through the parent device. + In isolation mode slaves can cross-talk among themselves + apart from talking through the master device. In isolation mode the port is set in private mode. - i.e. port won't allow cross communication between dependent devices. + i.e. port won't allow cross communication between slaves. In isolation mode the port is set in VEPA mode. i.e. port will offload switching functionality to the external entity as described in 802.1Qbg. @@ -1548,7 +1548,7 @@ by KATOH Yasufumi fstab フォーマットの一行と同じフォーマットのマウントポイントの指定をします。 - 加えて、LXC では rshared や rprivate といったマウント・プロパゲーションオプションと、独自の 3 つのマウントオプションが使えます。 + 加えて、LXC では rslave や rprivate といったマウント・プロパゲーションオプションと、独自の 3 つのマウントオプションが使えます。 は、マウントが失敗しても失敗を返さずに無視します。 は、マウントポイントをマウントする際にディレクトリもしくはファイルを作成します。 を指定すると、マウントされたコンテナルートからの相対パスとして取得されます。 diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in index 3ed71c214..ae04e3af3 100644 --- a/doc/lxc.container.conf.sgml.in +++ b/doc/lxc.container.conf.sgml.in @@ -530,25 +530,25 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA modes are , and . It defaults to mode. In mode TX processing up to L3 happens on the stack instance - attached to the dependent device and packets are switched to the stack instance of the - parent device for the L2 processing and routing from that instance will be - used before packets are queued on the outbound device. In this mode the dependent devices + attached to the slave device and packets are switched to the stack instance of the + master device for the L2 processing and routing from that instance will be + used before packets are queued on the outbound device. In this mode the slaves will not receive nor can send multicast / broadcast traffic. In mode TX processing is very similar to the L3 mode except that iptables (conn-tracking) works in this mode and hence it is L3-symmetric (L3s). This will have slightly less performance but that shouldn't matter since you are choosing this mode over plain-L3 mode to make conn-tracking work. In mode TX processing happens on the stack instance attached to - the dependent device and packets are switched and queued to the parent device to send devices - out. In this mode the dependent devices will RX/TX multicast and broadcast (if applicable) as well. + the slave device and packets are switched and queued to the master device to send + out. In this mode the slaves will RX/TX multicast and broadcast (if applicable) as well. specifies the isolation mode. The accepted isolation values are , and . It defaults to . - In isolation mode dependent devices can cross-talk among themselves - apart from talking through the parent device. + In isolation mode slaves can cross-talk among themselves + apart from talking through the master device. In isolation mode the port is set in private mode. - i.e. port won't allow cross communication between dependent devices. + i.e. port won't allow cross communication between slaves. In isolation mode the port is set in VEPA mode. i.e. port will offload switching functionality to the external entity as described in 802.1Qbg. @@ -1164,7 +1164,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Specify a mount point corresponding to a line in the fstab format. - Moreover lxc supports mount propagation, such as rshared or + Moreover lxc supports mount propagation, such as rslave or rprivate, and adds three additional mount options. don't fail if mount does not work. or diff --git a/hooks/Makefile.am b/hooks/Makefile.am index 5ae73d72c..ddfd4bc32 100644 --- a/hooks/Makefile.am +++ b/hooks/Makefile.am @@ -10,6 +10,8 @@ hooks_SCRIPTS = \ squid-deb-proxy-client \ nvidia + +if !HAVE_ISULAD binhooks_PROGRAMS = \ unmount-namespace @@ -20,5 +22,6 @@ if IS_BIONIC unmount_namespace_SOURCES += \ ../src/include/lxcmntent.c ../src/include/lxcmntent.h endif +endif EXTRA_DIST=$(hooks_SCRIPTS) diff --git a/src/include/fexecve.c b/src/include/fexecve.c index 40d2b5b46..123f27309 100644 --- a/src/include/fexecve.c +++ b/src/include/fexecve.c @@ -29,7 +29,7 @@ #include #include "config.h" #include "macro.h" -#include "process_utils.h" +#include "raw_syscalls.h" int fexecve(int fd, char *const argv[], char *const envp[]) { @@ -41,9 +41,11 @@ int fexecve(int fd, char *const argv[], char *const envp[]) return -1; } - execveat(fd, "", argv, envp, AT_EMPTY_PATH); +#ifdef __NR_execveat + lxc_raw_execveat(fd, "", argv, envp, AT_EMPTY_PATH); if (errno != ENOSYS) return -1; +#endif ret = snprintf(procfd, sizeof(procfd), "/proc/self/fd/%d", fd); if (ret < 0 || (size_t)ret >= sizeof(procfd)) { diff --git a/src/include/openpty.c b/src/include/openpty.c index 7804d4c98..01579c517 100644 --- a/src/include/openpty.c +++ b/src/include/openpty.c @@ -34,43 +34,43 @@ #define _PATH_DEVPTMX "/dev/ptmx" -int openpty (int *aptmx, int *apts, char *name, struct termios *termp, +int openpty (int *amaster, int *aslave, char *name, struct termios *termp, struct winsize *winp) { char buf[PATH_MAX]; - int ptmx, pts; + int master, slave; - ptmx = open(_PATH_DEVPTMX, O_RDWR); - if (ptmx == -1) + master = open(_PATH_DEVPTMX, O_RDWR); + if (master == -1) return -1; - if (grantpt(ptmx)) + if (grantpt(master)) goto fail; - if (unlockpt(ptmx)) + if (unlockpt(master)) goto fail; - if (ptsname_r(ptmx, buf, sizeof buf)) + if (ptsname_r(master, buf, sizeof buf)) goto fail; - pts = open(buf, O_RDWR | O_NOCTTY); - if (pts == -1) + slave = open(buf, O_RDWR | O_NOCTTY); + if (slave == -1) goto fail; /* XXX Should we ignore errors here? */ if (termp) - tcsetattr(pts, TCSAFLUSH, termp); + tcsetattr(slave, TCSAFLUSH, termp); if (winp) - ioctl(pts, TIOCSWINSZ, winp); + ioctl(slave, TIOCSWINSZ, winp); - *aptmx = ptmx; - *apts = pts; + *amaster = master; + *aslave = slave; if (name != NULL) strcpy(name, buf); return 0; fail: - close(ptmx); + close(master); return -1; } diff --git a/src/include/openpty.h b/src/include/openpty.h index cb452e52a..6e7bf8d2d 100644 --- a/src/include/openpty.h +++ b/src/include/openpty.h @@ -27,12 +27,10 @@ #include #include -/* - * Create pseudo tty ptmx pts pair with @__name and set terminal - * attributes according to @__termp and @__winp and return handles for both - * ends in @__aptmx and @__apts. - */ -extern int openpty (int *__aptmx, int *__apts, char *__name, +/* Create pseudo tty master slave pair with NAME and set terminal + attributes according to TERMP and WINP and return handles for both + ends in AMASTER and ASLAVE. */ +extern int openpty (int *__amaster, int *__aslave, char *__name, const struct termios *__termp, const struct winsize *__winp); diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am index d1e23647e..0e1ba8da9 100644 --- a/src/lxc/Makefile.am +++ b/src/lxc/Makefile.am @@ -27,7 +27,7 @@ noinst_HEADERS = api_extensions.h \ memory_utils.h \ monitor.h \ namespace.h \ - process_utils.h \ + raw_syscalls.h \ rexec.h \ start.h \ state.h \ @@ -52,6 +52,16 @@ noinst_HEADERS = api_extensions.h \ utils.h \ uuid.h +if HAVE_ISULAD +noinst_HEADERS += isulad_utils.h path.h \ + json/json_common.h json/defs.h \ + json/oci_runtime_hooks.h \ + json/logger_json_file.h \ + json/oci_runtime_spec.h \ + json/read-file.h \ + exec_commands.h +endif + if IS_BIONIC noinst_HEADERS += ../include/fexecve.h \ ../include/lxcmntent.h \ @@ -128,13 +138,14 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \ network.c network.h \ monitor.c monitor.h \ parse.c parse.h \ - process_utils.c process_utils.h \ + raw_syscalls.c raw_syscalls.h \ ringbuf.c ringbuf.h \ rtnl.c rtnl.h \ state.c state.h \ start.c start.h \ storage/btrfs.c storage/btrfs.h \ storage/dir.c storage/dir.h \ + storage/block.c storage/block.h \ storage/loop.c storage/loop.h \ storage/lvm.c storage/lvm.h \ storage/nbd.c storage/nbd.h \ @@ -154,6 +165,18 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \ version.h \ $(LSM_SOURCES) +if HAVE_ISULAD +liblxc_la_SOURCES += isulad_utils.c isulad_utils.h \ + path.c path.h \ + json/json_common.c json/json_common.h \ + json/defs.h json/defs.c \ + json/oci_runtime_hooks.c json/oci_runtime_hooks.h \ + json/logger_json_file.c json/logger_json_file.h \ + json/oci_runtime_spec.c json/oci_runtime_spec.h \ + json/read-file.c json/read-file.h \ + exec_commands.c exec_commands.h +endif + if IS_BIONIC liblxc_la_SOURCES += ../include/fexecve.c ../include/fexecve.h \ ../include/lxcmntent.c ../include/lxcmntent.h \ @@ -212,6 +235,10 @@ AM_CFLAGS = -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ -I $(top_srcdir)/src/lxc/storage \ -I $(top_srcdir)/src/lxc/cgroups +if HAVE_ISULAD +AM_CFLAGS += -I $(top_srcdir)/src/lxc/json +AM_CFLAGS += -DHAVE_ISULAD +endif if ENABLE_APPARMOR AM_CFLAGS += -DHAVE_APPARMOR endif @@ -249,6 +276,10 @@ liblxc_la_CFLAGS += -fsanitize=address \ -fno-omit-frame-pointer endif +if HAVE_ISULAD +liblxc_la_CFLAGS += -D_FORTIFY_SOURCE=2 -Wall +endif + if ENABLE_UBSAN liblxc_la_CFLAGS += -fsanitize=undefined endif @@ -258,6 +289,12 @@ liblxc_la_LDFLAGS = -pthread \ -Wl,-soname,liblxc.so.$(firstword $(subst ., ,@LXC_ABI@)) \ -version-info @LXC_ABI_MAJOR@ +if HAVE_ISULAD +liblxc_la_LDFLAGS += @YAJL_LIBS@ -Wl,-z,relro \ + -Wl,-z,now \ + -Wl,-z,noexecstack +endif + liblxc_la_LIBADD = $(CAP_LIBS) \ $(OPENSSL_LIBS) \ $(SELINUX_LIBS) \ @@ -384,7 +421,7 @@ init_lxc_SOURCES = cmd/lxc_init.c \ initutils.c initutils.h \ memory_utils.h \ parse.c parse.h \ - process_utils.c process_utils.h \ + raw_syscalls.c raw_syscalls.h \ syscall_numbers.h \ string_utils.c string_utils.h @@ -395,7 +432,7 @@ lxc_monitord_SOURCES = cmd/lxc_monitord.c \ log.c log.h \ mainloop.c mainloop.h \ monitor.c monitor.h \ - process_utils.c process_utils.h \ + raw_syscalls.c raw_syscalls.h \ syscall_numbers.h \ utils.c utils.h lxc_user_nic_SOURCES = cmd/lxc_user_nic.c \ @@ -404,7 +441,7 @@ lxc_user_nic_SOURCES = cmd/lxc_user_nic.c \ memory_utils.h \ network.c network.h \ parse.c parse.h \ - process_utils.c process_utils.h \ + raw_syscalls.c raw_syscalls.h \ syscall_numbers.h \ file_utils.c file_utils.h \ string_utils.c string_utils.h \ diff --git a/src/lxc/af_unix.c b/src/lxc/af_unix.c index 5cf54917f..9f268be60 100644 --- a/src/lxc/af_unix.c +++ b/src/lxc/af_unix.c @@ -18,7 +18,7 @@ #include "log.h" #include "macro.h" #include "memory_utils.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "utils.h" #ifndef HAVE_STRLCPY @@ -168,7 +168,7 @@ int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data, } static int lxc_abstract_unix_recv_fds_iov(int fd, int *recvfds, int num_recvfds, - struct iovec *iov, size_t iovlen) + struct iovec *iov, size_t iovlen, unsigned int timeout) { __do_free char *cmsgbuf = NULL; int ret; @@ -188,8 +188,24 @@ static int lxc_abstract_unix_recv_fds_iov(int fd, int *recvfds, int num_recvfds, msg.msg_iov = iov; msg.msg_iovlen = iovlen; +#ifdef HAVE_ISULAD + struct timeval out; + if (timeout > 0) { + memset(&out, 0, sizeof(out)); + out.tv_sec = timeout / 1000000; + out.tv_usec = timeout % 1000000; + ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, + (const void *)&out, sizeof(out)); + if (ret < 0) { + ERROR("Failed to set %u timeout on containter " + "state socket", timeout); + return ret; + } + } +#endif + do { - ret = recvmsg(fd, &msg, MSG_CMSG_CLOEXEC); + ret = recvmsg(fd, &msg, 0); } while (ret < 0 && errno == EINTR); if (ret < 0 || ret == 0) return ret; @@ -220,8 +236,21 @@ int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds, .iov_base = data ? data : buf, .iov_len = data ? size : sizeof(buf), }; - return lxc_abstract_unix_recv_fds_iov(fd, recvfds, num_recvfds, &iov, 1); + return lxc_abstract_unix_recv_fds_iov(fd, recvfds, num_recvfds, &iov, 1, 0); +} + +#ifdef HAVE_ISULAD +int lxc_abstract_unix_recv_fds_timeout(int fd, int *recvfds, int num_recvfds, + void *data, size_t size, unsigned int timeout) +{ + char buf[1] = {0}; + struct iovec iov = { + .iov_base = data ? data : buf, + .iov_len = data ? size : sizeof(buf), + }; + return lxc_abstract_unix_recv_fds_iov(fd, recvfds, num_recvfds, &iov, 1, timeout); } +#endif int lxc_abstract_unix_send_credential(int fd, void *data, size_t size) { diff --git a/src/lxc/af_unix.h b/src/lxc/af_unix.h index 5a1482c35..6943a61ee 100644 --- a/src/lxc/af_unix.h +++ b/src/lxc/af_unix.h @@ -7,38 +7,28 @@ #include #include -#include "compiler.h" - /* does not enforce \0-termination */ extern int lxc_abstract_unix_open(const char *path, int type, int flags); extern void lxc_abstract_unix_close(int fd); /* does not enforce \0-termination */ extern int lxc_abstract_unix_connect(const char *path); - extern int lxc_abstract_unix_send_fds(int fd, int *sendfds, int num_sendfds, - void *data, size_t size) -__access_r(2, 3) __access_r(4, 5); - -extern int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, int num_sendfds, - struct iovec *iov, size_t iovlen) -__access_r(2, 3); - -extern int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds, - void *data, size_t size) -__access_r(2, 3) __access_r(4, 5); - + void *data, size_t size); +extern int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, + int num_sendfds, struct iovec *iov, + size_t iovlen); extern int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data, size_t size); - -extern int lxc_abstract_unix_send_credential(int fd, void *data, size_t size) -__access_r(2, 3); - -extern int lxc_abstract_unix_rcv_credential(int fd, void *data, size_t size) -__access_w(2, 3); - +extern int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds, + void *data, size_t size); +extern int lxc_abstract_unix_send_credential(int fd, void *data, size_t size); +extern int lxc_abstract_unix_rcv_credential(int fd, void *data, size_t size); extern int lxc_unix_sockaddr(struct sockaddr_un *ret, const char *path); extern int lxc_unix_connect(struct sockaddr_un *addr); extern int lxc_unix_connect_type(struct sockaddr_un *addr, int type); extern int lxc_socket_set_timeout(int fd, int rcv_timeout, int snd_timeout); - +#ifdef HAVE_ISULAD +int lxc_abstract_unix_recv_fds_timeout(int fd, int *recvfds, int num_recvfds, + void *data, size_t size, unsigned int timeout); +#endif #endif /* __LXC_AF_UNIX_H */ diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h index 3afdc35b9..9ff071edf 100644 --- a/src/lxc/api_extensions.h +++ b/src/lxc/api_extensions.h @@ -38,7 +38,6 @@ static char *api_extensions[] = { "cgroup2_devices", #endif "cgroup2", - "pidfd", }; static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions); diff --git a/src/lxc/attach.c b/src/lxc/attach.c index 38e16f2d1..068cc5f8e 100644 --- a/src/lxc/attach.c +++ b/src/lxc/attach.c @@ -40,7 +40,7 @@ #include "mainloop.h" #include "memory_utils.h" #include "namespace.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "syscall_wrappers.h" #include "terminal.h" #include "utils.h" @@ -49,6 +49,25 @@ #include #endif +#ifdef HAVE_ISULAD +#include "exec_commands.h" + +typedef enum { + ATTACH_INIT, + ATTACH_TIMEOUT, + ATTACH_MAX, +} attach_timeout_t; + +static volatile attach_timeout_t g_attach_timeout_state = ATTACH_INIT; + +struct attach_timeout_conf { + int64_t timeout; + unsigned long long start_time; + pid_t pid; +}; + +#endif + lxc_log_define(attach, lxc); /* Define default options if no options are supplied by the user. */ @@ -194,8 +213,12 @@ int lxc_attach_remount_sys_proc(void) if (ret < 0) return log_error_errno(-1, errno, "Failed to unshare mount namespace"); - if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) - SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); + if (detect_shared_rootfs()) { + if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) { + SYSERROR("Failed to make / rslave"); + ERROR("Continuing..."); + } + } /* Assume /proc is always mounted, so remount it. */ ret = umount2("/proc", MNT_DETACH); @@ -625,23 +648,69 @@ static signed long get_personality(const char *name, const char *lxcpath) struct attach_clone_payload { int ipc_socket; - int terminal_pts_fd; + int terminal_slave_fd; lxc_attach_options_t *options; struct lxc_proc_context_info *init_ctx; lxc_attach_exec_t exec_function; void *exec_payload; +#ifdef HAVE_ISULAD + struct lxc_terminal *terminal; +#endif }; static void lxc_put_attach_clone_payload(struct attach_clone_payload *p) { close_prot_errno_disarm(p->ipc_socket); - close_prot_errno_disarm(p->terminal_pts_fd); + close_prot_errno_disarm(p->terminal_slave_fd); if (p->init_ctx) { lxc_proc_put_context_info(p->init_ctx); p->init_ctx = NULL; } } +#ifdef HAVE_ISULAD +static int isulad_set_attach_pipes(struct lxc_terminal *terminal) +{ + int ret = 0; + if (terminal->pipes[0][1] >= 0) { + close(terminal->pipes[0][1]); + terminal->pipes[0][1] = -1; + } + + if (terminal->pipes[0][0] >= 0) { + ret = dup2(terminal->pipes[0][0], STDIN_FILENO); + if (ret < 0) + goto out; + } + + if (terminal->pipes[1][0] >= 0) { + close(terminal->pipes[1][0]); + terminal->pipes[1][0] = -1; + } + + if (terminal->pipes[1][1] >= 0) { + ret = dup2(terminal->pipes[1][1], STDOUT_FILENO); + if (ret < 0) + goto out; + } + if (terminal->pipes[2][0] >= 0) { + close(terminal->pipes[2][0]); + terminal->pipes[2][0] = -1; + } + + if (terminal->pipes[2][1] >= 0) { + ret = dup2(terminal->pipes[2][1], STDERR_FILENO); + if (ret < 0) + goto out; + } + + setsid(); +out: + return ret; +} + +#endif + static int attach_child_main(struct attach_clone_payload *payload) { int lsm_fd, ret; @@ -654,6 +723,31 @@ static int attach_child_main(struct attach_clone_payload *payload) bool needs_lsm = (options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_LSM) && init_ctx->lsm_label; +#ifdef HAVE_ISULAD + int msg_fd = -1; + sigset_t mask; + + /*isulad: record errpipe fd*/ + msg_fd = init_ctx->container->lxc_conf->errpipe[1]; + init_ctx->container->lxc_conf->errpipe[1] = -1; + /*isulad: set system umask */ + umask(init_ctx->container->lxc_conf->umask); + + /*isulad: restore default signal handlers and unblock all signals*/ + for (int i = 1; i < NSIG; i++) + signal(i, SIG_DFL); + + ret = sigfillset(&mask); + if (ret < 0) { + SYSERROR("Failed to fill signal mask"); + goto on_error;; + } + ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); + if (ret < 0) { + SYSERROR("Failed to set signal mask"); + goto on_error; + } +#endif /* A description of the purpose of this functionality is provided in the * lxc-attach(1) manual page. We have to remount here and not in the @@ -695,6 +789,24 @@ static int attach_child_main(struct attach_clone_payload *payload) TRACE("Dropped capabilities"); } +#ifdef HAVE_ISULAD + /* isulad: set workdir */ + if (init_ctx->container->lxc_conf->init_cwd) { + char *init_cwd; + init_cwd = init_ctx->container->lxc_conf->init_cwd; + /* try to create workdir if not exist */ + struct stat st; + if (stat(init_cwd, &st) < 0 && mkdir_p(init_cwd, 0750) < 0) { + SYSERROR("Try to create directory \"%s\" as workdir failed when attach", init_cwd); + goto on_error; + } + if (chdir(init_cwd)) { + SYSERROR("Could not change directory to \"%s\" when attach", init_cwd); + goto on_error; + } + } +#endif + /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) * if you want this to be a no-op). */ @@ -736,8 +848,10 @@ static int attach_child_main(struct attach_clone_payload *payload) goto on_error; } +#ifndef HAVE_ISULAD if (!lxc_setgroups(0, NULL) && errno != EPERM) goto on_error; +#endif if (options->namespaces & CLONE_NEWUSER) { /* Check whether nsuid 0 has a mapping. */ @@ -770,6 +884,13 @@ static int attach_child_main(struct attach_clone_payload *payload) else new_gid = ns_root_gid; +#ifdef HAVE_ISULAD + // isulad: set env home in container + if (lxc_setup_env_home(new_uid) < 0) { + goto on_error; + } +#endif + if ((init_ctx->container && init_ctx->container->lxc_conf && init_ctx->container->lxc_conf->no_new_privs) || (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) { @@ -810,10 +931,12 @@ static int attach_child_main(struct attach_clone_payload *payload) goto on_error; } +#ifndef HAVE_ISULAD close(payload->ipc_socket); payload->ipc_socket = -EBADF; lxc_proc_put_context_info(init_ctx); payload->init_ctx = NULL; +#endif /* The following is done after the communication socket is shut down. * That way, all errors that might (though unlikely) occur up until this @@ -856,13 +979,33 @@ static int attach_child_main(struct attach_clone_payload *payload) } if (options->attach_flags & LXC_ATTACH_TERMINAL) { - ret = lxc_terminal_prepare_login(payload->terminal_pts_fd); + +#ifdef HAVE_ISULAD + /* isulad: dup2 pipe[0][0] to container stdin, pipe[1][1] to container stdout, pipe[2][1] to container stderr */ + if (payload->terminal->disable_pty) { + ret = isulad_set_attach_pipes(payload->terminal); + if (ret < 0) { + SYSERROR("Failed to prepare terminal file pipes"); + goto on_error; + } + } + + if(!payload->terminal->disable_pty && payload->terminal_slave_fd >= 0) { + ret = lxc_terminal_prepare_login(payload->terminal_slave_fd); + if (ret < 0) { + SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_slave_fd); + goto on_error; + } + } +#else + ret = lxc_terminal_prepare_login(payload->terminal_slave_fd); if (ret < 0) { - SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_pts_fd); + SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_slave_fd); goto on_error; } - TRACE("Prepared terminal file descriptor %d", payload->terminal_pts_fd); + TRACE("Prepared terminal file descriptor %d", payload->terminal_slave_fd); +#endif } /* Avoid unnecessary syscalls. */ @@ -872,6 +1015,17 @@ static int attach_child_main(struct attach_clone_payload *payload) if (new_gid == ns_root_gid) new_gid = LXC_INVALID_GID; +#ifdef HAVE_ISULAD + if (prctl(PR_SET_KEEPCAPS, 1) < 0) { + SYSERROR("Failed to keep permitted capabilities"); + goto on_error; + } + + if (!lxc_setgroups(init_ctx->container->lxc_conf->init_groups_len, + init_ctx->container->lxc_conf->init_groups)) + goto on_error; +#endif + /* Make sure that the processes STDIO is correctly owned by the user that we are switching to */ ret = fix_stdio_permissions(new_uid); if (ret) @@ -880,8 +1034,27 @@ static int attach_child_main(struct attach_clone_payload *payload) if (!lxc_switch_uid_gid(new_uid, new_gid)) goto on_error; +#ifdef HAVE_ISULAD + if (prctl(PR_SET_KEEPCAPS, 0) < 0) { + SYSERROR("Failed to clear permitted capabilities"); + goto on_error; + } + + if (lxc_drop_caps(init_ctx->container->lxc_conf) != 0) { + ERROR("Failed to drop caps."); + goto on_error; + } + + close(payload->ipc_socket); + payload->ipc_socket = -EBADF; + lxc_proc_put_context_info(init_ctx); + payload->init_ctx = NULL; + _exit(payload->exec_function(payload->exec_payload, msg_fd)); +#else /* We're done, so we can now do whatever the user intended us to do. */ _exit(payload->exec_function(payload->exec_payload)); +#endif + on_error: lxc_put_attach_clone_payload(payload); @@ -889,12 +1062,31 @@ on_error: } static int lxc_attach_terminal(struct lxc_conf *conf, - struct lxc_terminal *terminal) + struct lxc_terminal *terminal, lxc_attach_options_t *options) { int ret; lxc_terminal_init(terminal); +#ifdef HAVE_ISULAD + /* isulad: if we pass fifo in option, use them as init fifos */ + if (options->init_fifo[0]) { + free(terminal->init_fifo[0]); + terminal->init_fifo[0] = safe_strdup(options->init_fifo[0]); + } + if (options->init_fifo[1]) { + free(terminal->init_fifo[1]); + terminal->init_fifo[1] = safe_strdup(options->init_fifo[1]); + } + if (options->init_fifo[2]) { + free(terminal->init_fifo[2]); + terminal->init_fifo[2] = safe_strdup(options->init_fifo[2]); + } + + terminal->disable_pty = options->disable_pty; + terminal->open_stdin = options->open_stdin; +#endif + ret = lxc_terminal_create(terminal); if (ret < 0) return log_error(-1, "Failed to create terminal"); @@ -932,14 +1124,14 @@ static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal, return 0; } -static inline void lxc_attach_terminal_close_ptmx(struct lxc_terminal *terminal) +static inline void lxc_attach_terminal_close_master(struct lxc_terminal *terminal) { - close_prot_errno_disarm(terminal->ptmx); + close_prot_errno_disarm(terminal->master); } -static inline void lxc_attach_terminal_close_pts(struct lxc_terminal *terminal) +static inline void lxc_attach_terminal_close_slave(struct lxc_terminal *terminal) { - close_prot_errno_disarm(terminal->pts); + close_prot_errno_disarm(terminal->slave); } static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal) @@ -952,9 +1144,125 @@ static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal) close_prot_errno_disarm(terminal->log_fd); } +#ifdef HAVE_ISULAD +/* isulad: attach timeout thread function */ +static void* wait_attach_timeout(void *arg) +{ + struct attach_timeout_conf *conf = (struct attach_timeout_conf *)arg; + + if (!conf || conf->timeout < 1) + goto out; + sleep(conf->timeout); + if (lxc_process_alive(conf->pid, conf->start_time)) { + g_attach_timeout_state = ATTACH_TIMEOUT; + if (kill(conf->pid, SIGKILL) < 0) { + ERROR("Failed to send signal %d to pid %d", SIGKILL, conf->pid); + } + } + +out: + free(conf); + return ((void *)0); +} + +/* isulad: create attach timeout thread */ +static int create_attach_timeout_thread(int64_t attach_timeout, pid_t pid) +{ + int ret = 0; + pthread_t ptid; + pthread_attr_t attr; + struct attach_timeout_conf *timeout_conf = NULL; + + timeout_conf = malloc(sizeof(struct attach_timeout_conf)); + if (timeout_conf == NULL) { + ERROR("Failed to malloc attach timeout conf"); + ret = -1; + goto out; + } + + memset(timeout_conf, 0, sizeof(struct attach_timeout_conf)); + timeout_conf->timeout = attach_timeout; + timeout_conf->pid = pid; + timeout_conf->start_time = lxc_get_process_startat(pid); + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + ret = pthread_create(&ptid, &attr, wait_attach_timeout, timeout_conf); + if (ret != 0) { + ERROR("Create attach wait timeout thread failed"); + free(timeout_conf); + goto out; + } + +out: + return ret; +} + +static int attach_signal_handler(int fd, uint32_t events, void *data, + struct lxc_epoll_descr *descr) +{ + int ret; + siginfo_t info; + struct signalfd_siginfo siginfo; + pid_t *pid = data; + + ret = lxc_read_nointr(fd, &siginfo, sizeof(siginfo)); + if (ret < 0) + return log_error(LXC_MAINLOOP_ERROR, "Failed to read signal info from signal file descriptor %d", fd); + + if (ret != sizeof(siginfo)) + return log_error(LXC_MAINLOOP_ERROR, "Unexpected size for struct signalfd_siginfo"); + + /* Check whether init is running. */ + info.si_pid = 0; + ret = waitid(P_PID, *pid, &info, WEXITED | WNOWAIT | WNOHANG); + if (ret == 0 && info.si_pid == *pid) { + return log_error(LXC_MAINLOOP_CLOSE, "Container attach init process %d exited", *pid); + } + + return LXC_MAINLOOP_CONTINUE; +} + +static int isulad_setup_signal_fd(sigset_t *oldmask) +{ + int ret; + sigset_t mask; + const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH}; + + /* Block everything except serious error signals. */ + ret = sigfillset(&mask); + if (ret < 0) + return -EBADF; + + for (int sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) { + ret = sigdelset(&mask, signals[sig]); + if (ret < 0) + return -EBADF; + } + + ret = pthread_sigmask(SIG_BLOCK, &mask, oldmask); + if (ret < 0) + return log_error_errno(-EBADF, errno, + "Failed to set signal mask"); + + ret = signalfd(-1, &mask, SFD_CLOEXEC); + if (ret < 0) + return log_error_errno(-EBADF, + errno, "Failed to create signal file descriptor"); + + TRACE("Created signal file descriptor %d", ret); + + return ret; +} + +int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, + void *exec_payload, lxc_attach_options_t *options, + pid_t *attached_process, char **err_msg) +#else int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, void *exec_payload, lxc_attach_options_t *options, pid_t *attached_process) +#endif { int i, ret, status; int ipc_sockets[2]; @@ -966,6 +1274,13 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, struct lxc_conf *conf; char *name, *lxcpath; struct attach_clone_payload payload = {0}; +#ifdef HAVE_ISULAD + struct lxc_exec_command_handler exec_command; + const char *suffix = options->suffix; + + exec_command.maincmd_fd = -1; + exec_command.terminal = &terminal; +#endif ret = access("/proc/self/ns", X_OK); if (ret) @@ -1014,8 +1329,14 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, } } conf = init_ctx->container->lxc_conf; - if (!conf) - return log_error_errno(-EINVAL, EINVAL, "Missing container confifg"); + +#ifdef HAVE_ISULAD + // always switch uid and gid for attach + if (options->uid == -1) + options->uid = init_ctx->container->lxc_conf->init_uid; + if (options->gid == -1) + options->gid = init_ctx->container->lxc_conf->init_gid; +#endif if (!fetch_seccomp(init_ctx->container, options)) WARN("Failed to get seccomp policy"); @@ -1090,7 +1411,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, } if (options->attach_flags & LXC_ATTACH_TERMINAL) { - ret = lxc_attach_terminal(conf, &terminal); + ret = lxc_attach_terminal(conf, &terminal, options); if (ret < 0) { ERROR("Failed to setup new terminal"); free(cwd); @@ -1099,6 +1420,12 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, } terminal.log_fd = options->log_fd; +#ifdef HAVE_ISULAD + if (suffix != NULL) { + exec_command.maincmd_fd = lxc_exec_cmd_init(name, lxcpath, suffix); + exec_command.terminal = &terminal; + } +#endif } else { lxc_terminal_init(&terminal); } @@ -1139,10 +1466,38 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); if (ret < 0) { SYSERROR("Could not set up required IPC mechanism for attaching"); +#ifdef HAVE_ISULAD + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + lxc_terminal_delete(&terminal); + lxc_terminal_conf_free(&terminal); + if (exec_command.maincmd_fd != -1) { + close(exec_command.maincmd_fd); + } + } +#endif + free(cwd); + lxc_proc_put_context_info(init_ctx); + return -1; + } + +#ifdef HAVE_ISULAD + /* isulad: pipdfd for get error message of child or grandchild process. */ + if (pipe2(conf->errpipe, O_CLOEXEC) != 0) { + SYSERROR("Failed to init errpipe"); + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + lxc_terminal_delete(&terminal); + lxc_terminal_conf_free(&terminal); + if (exec_command.maincmd_fd != -1) { + close(exec_command.maincmd_fd); + } + } + close(ipc_sockets[0]); + close(ipc_sockets[1]); free(cwd); lxc_proc_put_context_info(init_ctx); return -1; } +#endif /* Create intermediate subprocess, two reasons: * 1. We can't setns() in the child itself, since we want to make @@ -1154,6 +1509,17 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, pid = fork(); if (pid < 0) { SYSERROR("Failed to create first subprocess"); +#ifdef HAVE_ISULAD + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + lxc_terminal_delete(&terminal); + lxc_terminal_conf_free(&terminal); + if (exec_command.maincmd_fd != -1) { + close(exec_command.maincmd_fd); + } + } + close(ipc_sockets[0]); + close(ipc_sockets[1]); +#endif free(cwd); lxc_proc_put_context_info(init_ctx); return -1; @@ -1163,13 +1529,38 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, int ret_parent = -1; pid_t to_cleanup_pid = pid; struct lxc_epoll_descr descr = {0}; +#ifdef HAVE_ISULAD + int isulad_sigfd; + sigset_t isulad_oldmask; + struct lxc_epoll_descr isulad_descr = {0}; +#endif /* close unneeded file descriptors */ close(ipc_sockets[1]); free(cwd); +#ifdef HAVE_ISULAD + /* isulad: close errpipe */ + close(conf->errpipe[1]); + conf->errpipe[1] = -1; + /* isulad: close pipe after clone */ + if (terminal.pipes[0][0] >= 0) { + close(terminal.pipes[0][0]); + terminal.pipes[0][0] = -1; + } + + if (terminal.pipes[1][1] >= 0) { + close(terminal.pipes[1][1]); + terminal.pipes[1][1] = -1; + } + + if (terminal.pipes[2][1] >= 0) { + close(terminal.pipes[2][1]); + terminal.pipes[2][1] = -1; + } +#endif lxc_proc_close_ns_fd(init_ctx); if (options->attach_flags & LXC_ATTACH_TERMINAL) - lxc_attach_terminal_close_pts(&terminal); + lxc_attach_terminal_close_slave(&terminal); /* Attach to cgroup, if requested. */ if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) { @@ -1200,7 +1591,11 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, /* Setup resource limits */ if (!lxc_list_empty(&conf->limits)) { +#ifdef HAVE_ISULAD + ret = setup_resource_limits(&conf->limits, pid, -1); +#else ret = setup_resource_limits(&conf->limits, pid); +#endif if (ret < 0) goto on_error; } @@ -1210,9 +1605,28 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, if (ret < 0) goto on_error; +#ifdef HAVE_ISULAD + ret = lxc_attach_terminal_mainloop_init(&terminal, &isulad_descr); + if (ret < 0) + goto on_error; + + if (suffix != NULL) { + (void)lxc_exec_cmd_mainloop_add(&descr, &exec_command); + } +#endif TRACE("Initialized terminal mainloop"); } +#ifdef HAVE_ISULAD + /* The signal fd has to be created before forking otherwise if the child + * process exits before we setup the signal fd, the event will be lost + * and the command will be stuck. + */ + isulad_sigfd = isulad_setup_signal_fd(&isulad_oldmask); + if (isulad_sigfd < 0) + goto close_mainloop; +#endif + /* Let the child process know to go ahead. */ status = 0; ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status)); @@ -1273,7 +1687,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, TRACE("Sent LSM label file descriptor %d to child", labelfd); } - if (conf->seccomp.seccomp) { + if (conf && conf->seccomp.seccomp) { ret = lxc_seccomp_recv_notifier_fd(&conf->seccomp, ipc_sockets[0]); if (ret < 0) goto close_mainloop; @@ -1290,6 +1704,34 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, *attached_process = attached_pid; +#ifdef HAVE_ISULAD + if (options->timeout > 0) { + ret = create_attach_timeout_thread(options->timeout, *attached_process); + if (ret) { + ERROR("Failed to create attach timeout thread for container."); + goto close_mainloop; + } + } + /* isulad: read error msg from pipe */ + ssize_t size_read; + char errbuf[BUFSIZ + 1] = {0}; + pid_t tmp_pid = *attached_process; + + size_read = read(conf->errpipe[0], errbuf, BUFSIZ); + if (size_read > 0) { + if (err_msg) + *err_msg = safe_strdup(errbuf); + goto close_mainloop; + } + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + ret = lxc_mainloop_add_handler(&descr, isulad_sigfd, attach_signal_handler, &tmp_pid); + if (ret < 0) { + ERROR("Failed to add signal handler for %d to mainloop", tmp_pid); + goto close_mainloop; + } + } +#endif + /* Now shut down communication with child, we're done. */ shutdown(ipc_sockets[0], SHUT_RDWR); close(ipc_sockets[0]); @@ -1298,6 +1740,15 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, ret_parent = 0; to_cleanup_pid = -1; + #ifdef HAVE_ISULAD + // iSulad: close stdin pipe if we do not want open_stdin with container stdin + if (!terminal.open_stdin) { + if (terminal.pipes[0][1] > 0) { + close(terminal.pipes[0][1]); + terminal.pipes[0][1] = -1; + } + } + #endif if (options->attach_flags & LXC_ATTACH_TERMINAL) { ret = lxc_mainloop(&descr, -1); if (ret < 0) { @@ -1306,9 +1757,20 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, } } +#ifdef HAVE_ISULAD + // do lxc_mainloop to make sure we do not lose any output + (void)lxc_mainloop(&isulad_descr, 100); + if (g_attach_timeout_state == ATTACH_TIMEOUT && err_msg != NULL && *err_msg == NULL) { + *err_msg = safe_strdup("Attach exceeded timeout"); + } +#endif close_mainloop: - if (options->attach_flags & LXC_ATTACH_TERMINAL) + if (options->attach_flags & LXC_ATTACH_TERMINAL) { +#ifdef HAVE_ISULAD + lxc_mainloop_close(&isulad_descr); +#endif lxc_mainloop_close(&descr); + } on_error: if (ipc_sockets[0] >= 0) { @@ -1322,6 +1784,11 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, if (options->attach_flags & LXC_ATTACH_TERMINAL) { lxc_terminal_delete(&terminal); lxc_terminal_conf_free(&terminal); +#ifdef HAVE_ISULAD + if (exec_command.maincmd_fd != -1) { + close(exec_command.maincmd_fd); + } +#endif } lxc_proc_put_context_info(init_ctx); @@ -1331,10 +1798,21 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, /* close unneeded file descriptors */ close_prot_errno_disarm(ipc_sockets[0]); +#ifdef HAVE_ISULAD + /* isulad: close errpipe */ + close(conf->errpipe[0]); + conf->errpipe[0] = -1; +#endif + if (options->attach_flags & LXC_ATTACH_TERMINAL) { - lxc_attach_terminal_close_ptmx(&terminal); + lxc_attach_terminal_close_master(&terminal); lxc_attach_terminal_close_peer(&terminal); lxc_attach_terminal_close_log(&terminal); +#ifdef HAVE_ISULAD + if (exec_command.maincmd_fd != -1) { + close(exec_command.maincmd_fd); + } +#endif } /* Wait for the parent to have setup cgroups. */ @@ -1377,9 +1855,12 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, payload.ipc_socket = ipc_sockets[1]; payload.options = options; payload.init_ctx = init_ctx; - payload.terminal_pts_fd = terminal.pts; + payload.terminal_slave_fd = terminal.slave; payload.exec_function = exec_function; payload.exec_payload = exec_payload; +#ifdef HAVE_ISULAD + payload.terminal = &terminal; +#endif pid = lxc_raw_clone(CLONE_PARENT, NULL); if (pid < 0) { @@ -1390,7 +1871,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, } if (pid == 0) { - if (options->attach_flags & LXC_ATTACH_TERMINAL) { + if (options->attach_flags & LXC_ATTACH_TERMINAL && terminal.tty_state) { ret = pthread_sigmask(SIG_SETMASK, &terminal.tty_state->oldmask, NULL); if (ret < 0) { @@ -1406,9 +1887,9 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, _exit(EXIT_FAILURE); } - if (options->attach_flags & LXC_ATTACH_TERMINAL) - lxc_attach_terminal_close_pts(&terminal); - + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + lxc_attach_terminal_close_slave(&terminal); + } /* Tell grandparent the pid of the pid of the newly created child. */ ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid)); if (ret != sizeof(pid)) { @@ -1430,7 +1911,11 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, _exit(EXIT_SUCCESS); } +#ifdef HAVE_ISULAD +int lxc_attach_run_command(void *payload, int msg_fd) +#else int lxc_attach_run_command(void *payload) +#endif { int ret = -1; lxc_attach_command_t *cmd = payload; @@ -1446,11 +1931,19 @@ int lxc_attach_run_command(void *payload) break; } } +#ifdef HAVE_ISULAD + /* isulad: write error messages */ + lxc_write_error_message(msg_fd, "exec: \"%s\": %s.", cmd->program, strerror(errno)); +#endif return log_error_errno(ret, errno, "Failed to exec \"%s\"", cmd->program); } +#ifdef HAVE_ISULAD +int lxc_attach_run_shell(void* payload, int msg_fd) +#else int lxc_attach_run_shell(void* payload) +#endif { __do_free char *buf = NULL; uid_t uid; diff --git a/src/lxc/attach.h b/src/lxc/attach.h index ef5a6c19c..831634424 100644 --- a/src/lxc/attach.h +++ b/src/lxc/attach.h @@ -20,9 +20,15 @@ struct lxc_proc_context_info { int ns_fd[LXC_NS_MAX]; }; +#ifdef HAVE_ISULAD +extern int lxc_attach(struct lxc_container *container, + lxc_attach_exec_t exec_function, void *exec_payload, + lxc_attach_options_t *options, pid_t *attached_process, char **err_msg); +#else extern int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, void *exec_payload, lxc_attach_options_t *options, pid_t *attached_process); +#endif extern int lxc_attach_remount_sys_proc(void); diff --git a/src/lxc/attach_options.h b/src/lxc/attach_options.h index 63e62d4ff..5767560fe 100644 --- a/src/lxc/attach_options.h +++ b/src/lxc/attach_options.h @@ -26,7 +26,7 @@ enum { /* The following are off by default: */ LXC_ATTACH_REMOUNT_PROC_SYS = 0x00010000, /*!< Remount /proc filesystem */ - LXC_ATTACH_LSM_NOW = 0x00020000, /*!< TODO: currently unused */ + LXC_ATTACH_LSM_NOW = 0x00020000, /*!< FIXME: unknown */ /* Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges. */ LXC_ATTACH_NO_NEW_PRIVS = 0x00040000, /*!< PR_SET_NO_NEW_PRIVS */ LXC_ATTACH_TERMINAL = 0x00080000, /*!< Allocate new terminal for attached process. */ @@ -49,7 +49,11 @@ enum { * * \return Function should return \c 0 on success, and any other value to denote failure. */ +#ifdef HAVE_ISULAD +typedef int (*lxc_attach_exec_t)(void* payload, int msg_fd); +#else typedef int (*lxc_attach_exec_t)(void* payload); +#endif /*! * LXC attach options for \ref lxc_container \c attach(). @@ -113,6 +117,12 @@ typedef struct lxc_attach_options_t { /*! File descriptor to log output. */ int log_fd; + + char *init_fifo[3]; /* isulad: default fifos for the start */ + int64_t timeout;/* isulad: Seconds for waiting on a container to attach/exec before it is killed*/ + const char *suffix; + bool disable_pty; + bool open_stdin; } lxc_attach_options_t; /*! Default attach options to use */ @@ -131,6 +141,7 @@ typedef struct lxc_attach_options_t { /* .stdout_fd = */ 1, \ /* .stderr_fd = */ 2, \ /* .log_fd = */ -EBADF, \ + /* .init_fifo = */ {NULL, NULL, NULL}, \ } /*! @@ -148,7 +159,11 @@ typedef struct lxc_attach_command_t { * * \return \c -1 on error, exit code of lxc_attach_command_t program on success. */ +#ifdef HAVE_ISULAD +extern int lxc_attach_run_command(void* payload, int msg_fd); +#else extern int lxc_attach_run_command(void* payload); +#endif /*! * \brief Run a shell command in the container. @@ -157,7 +172,11 @@ extern int lxc_attach_run_command(void* payload); * * \return Exit code of shell. */ +#ifdef HAVE_ISULAD +extern int lxc_attach_run_shell(void* payload, int msg_fd); +#else extern int lxc_attach_run_shell(void* payload); +#endif #ifdef __cplusplus } diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index 603940683..4a0961f13 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -215,6 +214,7 @@ static char *read_file(const char *fnam) return move_ptr(buf); } +#ifndef HAVE_ISULAD /* Taken over modified from the kernel sources. */ #define NBITS 32 /* bits in uint32_t */ #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) @@ -477,12 +477,14 @@ static bool copy_parent_file(const char *parent_cgroup, value, child_cgroup, file); return true; } +#endif static inline bool is_unified_hierarchy(const struct hierarchy *h) { return h->version == CGROUP2_SUPER_MAGIC; } +#ifndef HAVE_ISULAD /* * Initialize the cpuset hierarchy in first directory of @cgroup_leaf and set * cgroup.clone_children so that children inherit settings. Since the @@ -562,6 +564,7 @@ static int cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, return fret; } +#endif /* Given two null-terminated lists of strings, return true if any string is in * both. @@ -673,7 +676,7 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line, if (!dup) return NULL; - lxc_iterate_parts(tok, dup, sep) + lxc_iterate_parts (tok, dup, sep) must_append_controller(klist, nlist, &aret, tok); } *p2 = ' '; @@ -726,7 +729,6 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char new->container_base_path = container_base_path; new->version = type; new->cgfd_con = -EBADF; - new->cgfd_limit = -EBADF; new->cgfd_mon = -EBADF; newentry = append_null_to_list((void ***)h); @@ -948,6 +950,115 @@ static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist, TRACE("named subsystem %d: %s", k, *it); } +struct generic_userns_exec_data { + struct hierarchy **hierarchies; + const char *container_cgroup; + struct lxc_conf *conf; + uid_t origuid; /* target uid in parent namespace */ + char *path; +}; + +#ifdef HAVE_ISULAD + +static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies, + const char *container_cgroup) +{ + if (!container_cgroup || !hierarchies) + return 0; + + for (int i = 0; hierarchies[i]; i++) { + struct hierarchy *h = hierarchies[i]; + int ret; + + if (!h->container_full_path) { + h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, container_cgroup, NULL); + } + + ret = lxc_rm_rf(h->container_full_path); + if (ret < 0) { + SYSERROR("Failed to destroy \"%s\"", h->container_full_path); + return -1; + } + + free_disarm(h->container_full_path); + } + + return 0; +} + +static int isulad_cgroup_tree_remove_wrapper(void *data) +{ + struct generic_userns_exec_data *arg = data; + uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid; + gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid; + int ret; + + if (!lxc_setgroups(0, NULL) && errno != EPERM) + return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)"); + + ret = setresgid(nsgid, nsgid, nsgid); + if (ret < 0) + return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)", + (int)nsgid, (int)nsgid, (int)nsgid); + + ret = setresuid(nsuid, nsuid, nsuid); + if (ret < 0) + return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)", + (int)nsuid, (int)nsuid, (int)nsuid); + + return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup); +} + +__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + int ret; + + if (!ops) { + ERROR("Called with uninitialized cgroup operations"); + return false; + } + + if (!ops->hierarchies) { + return false; + } + + if (!handler) { + ERROR("Called with uninitialized handler"); + return false; + } + + if (!handler->conf) { + ERROR("Called with uninitialized conf"); + return false; + } + +#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX + ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices); + if (ret < 0) + WARN("Failed to detach bpf program from cgroup"); +#endif + + if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) { + struct generic_userns_exec_data wrap = { + .conf = handler->conf, + .container_cgroup = ops->container_cgroup, + .hierarchies = ops->hierarchies, + .origuid = 0, + }; + ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper, + &wrap, "cgroup_tree_remove_wrapper"); + } else { + ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup); + } + if (ret < 0) { + SYSWARN("Failed to destroy cgroups"); + return false; + } + + return true; +} +#else static int cgroup_tree_remove(struct hierarchy **hierarchies, const char *container_cgroup) { @@ -958,29 +1069,19 @@ static int cgroup_tree_remove(struct hierarchy **hierarchies, struct hierarchy *h = hierarchies[i]; int ret; - if (!h->container_limit_path) + if (!h->container_full_path) continue; - ret = lxc_rm_rf(h->container_limit_path); + ret = lxc_rm_rf(h->container_full_path); if (ret < 0) - WARN("Failed to destroy \"%s\"", h->container_limit_path); + WARN("Failed to destroy \"%s\"", h->container_full_path); - if (h->container_limit_path != h->container_full_path) - free_disarm(h->container_limit_path); free_disarm(h->container_full_path); } return 0; } -struct generic_userns_exec_data { - struct hierarchy **hierarchies; - const char *container_cgroup; - struct lxc_conf *conf; - uid_t origuid; /* target uid in parent namespace */ - char *path; -}; - static int cgroup_tree_remove_wrapper(void *data) { struct generic_userns_exec_data *arg = data; @@ -1048,7 +1149,15 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, if (ret < 0) SYSWARN("Failed to destroy cgroups"); } +#endif +#ifdef HAVE_ISULAD +__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + return; +} +#else __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, struct lxc_handler *handler) { @@ -1082,7 +1191,6 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, for (int i = 0; ops->hierarchies[i]; i++) { __do_free char *pivot_path = NULL; struct hierarchy *h = ops->hierarchies[i]; - size_t offset; int ret; if (!h->monitor_full_path) @@ -1094,21 +1202,16 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, goto try_lxc_rm_rf; } - if (conf && conf->cgroup_meta.monitor_dir) - pivot_path = must_make_path(h->mountpoint, h->container_base_path, - conf->cgroup_meta.monitor_dir, CGROUP_PIVOT, NULL); - else if (conf && conf->cgroup_meta.dir) - pivot_path = must_make_path(h->mountpoint, h->container_base_path, - conf->cgroup_meta.dir, CGROUP_PIVOT, NULL); + if (conf && conf->cgroup_meta.dir) + pivot_path = must_make_path(h->mountpoint, + h->container_base_path, + conf->cgroup_meta.dir, + CGROUP_PIVOT, NULL); else - pivot_path = must_make_path(h->mountpoint, h->container_base_path, + pivot_path = must_make_path(h->mountpoint, + h->container_base_path, CGROUP_PIVOT, NULL); - offset = strlen(h->mountpoint) + strlen(h->container_base_path); - - if (cg_legacy_handle_cpuset_hierarchy(h, pivot_path + offset)) - SYSWARN("Failed to initialize cpuset %s/" CGROUP_PIVOT, pivot_path); - ret = mkdir_p(pivot_path, 0755); if (ret < 0 && errno != EEXIST) { ERROR("Failed to create %s", pivot_path); @@ -1127,6 +1230,15 @@ try_lxc_rm_rf: WARN("Failed to destroy \"%s\"", h->monitor_full_path); } } +#endif + +#ifdef HAVE_ISULAD +__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + return true; +} +#else static int mkdir_eexist_on_last(const char *dir, mode_t mode) { @@ -1150,18 +1262,16 @@ static int mkdir_eexist_on_last(const char *dir, mode_t mode) ret = mkdir(makeme, mode); if (ret < 0 && ((errno != EEXIST) || (orig_len == cur_len))) - return log_warn_errno(-1, errno, "Failed to create directory \"%s\"", makeme); + return log_error_errno(-1, errno, "Failed to create directory \"%s\"", makeme); } while (tmp != dir); return 0; } -static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, - struct hierarchy *h, const char *cgroup_tree, - const char *cgroup_leaf, bool payload, - const char *cgroup_limit_dir) +static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree, + const char *cgroup_leaf, bool payload) { - __do_free char *path = NULL, *limit_path = NULL; + __do_free char *path = NULL; int ret, ret_cpuset; path = must_make_path(h->mountpoint, h->container_base_path, cgroup_leaf, NULL); @@ -1172,37 +1282,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, if (ret_cpuset < 0) return log_error_errno(false, errno, "Failed to handle legacy cpuset controller"); - if (payload && cgroup_limit_dir) { - /* with isolation both parts need to not already exist */ - limit_path = must_make_path(h->mountpoint, - h->container_base_path, - cgroup_limit_dir, NULL); - - ret = mkdir_eexist_on_last(limit_path, 0755); - if (ret < 0) - return log_debug_errno(false, - errno, "Failed to create %s limiting cgroup", - limit_path); - - h->cgfd_limit = lxc_open_dirfd(limit_path); - if (h->cgfd_limit < 0) - return log_error_errno(false, errno, - "Failed to open %s", path); - h->container_limit_path = move_ptr(limit_path); - - /* - * With isolation the devices legacy cgroup needs to be - * iinitialized early, as it typically contains an 'a' (all) - * line, which is not possible once a subdirectory has been - * created. - */ - if (string_in_list(h->controllers, "devices")) { - ret = ops->setup_limits_legacy(ops, conf, true); - if (ret < 0) - return ret; - } - } - ret = mkdir_eexist_on_last(path, 0755); if (ret < 0) { /* @@ -1211,7 +1290,7 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, * directory for us to ensure correct initialization. */ if (ret_cpuset != 1 || cgroup_tree) - return log_debug_errno(false, errno, "Failed to create %s cgroup", path); + return log_error_errno(false, errno, "Failed to create %s cgroup", path); } if (payload) { @@ -1219,10 +1298,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, if (h->cgfd_con < 0) return log_error_errno(false, errno, "Failed to open %s", path); h->container_full_path = move_ptr(path); - if (h->cgfd_limit < 0) - h->cgfd_limit = h->cgfd_con; - if (!h->container_limit_path) - h->container_limit_path = h->container_full_path; } else { h->cgfd_mon = lxc_open_dirfd(path); if (h->cgfd_mon < 0) @@ -1235,15 +1310,11 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload) { - __do_free char *full_path = NULL, *__limit_path = NULL; - char *limit_path = NULL; + __do_free char *full_path = NULL; if (payload) { __lxc_unused __do_close int fd = move_fd(h->cgfd_con); full_path = move_ptr(h->container_full_path); - limit_path = move_ptr(h->container_limit_path); - if (limit_path != full_path) - __limit_path = limit_path; } else { __lxc_unused __do_close int fd = move_fd(h->cgfd_mon); full_path = move_ptr(h->monitor_full_path); @@ -1251,38 +1322,6 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload) if (full_path && rmdir(full_path)) SYSWARN("Failed to rmdir(\"%s\") cgroup", full_path); - if (limit_path && rmdir(limit_path)) - SYSWARN("Failed to rmdir(\"%s\") cgroup", limit_path); -} - -/* - * Check we have no lxc.cgroup.dir, and that lxc.cgroup.dir.limit_prefix is a - * proper prefix directory of lxc.cgroup.dir.payload. - * - * Returns the prefix length if it is set, otherwise zero on success. - */ -static bool check_cgroup_dir_config(struct lxc_conf *conf) -{ - const char *monitor_dir = conf->cgroup_meta.monitor_dir, - *container_dir = conf->cgroup_meta.container_dir, - *namespace_dir = conf->cgroup_meta.namespace_dir; - - /* none of the new options are set, all is fine */ - if (!monitor_dir && !container_dir && !namespace_dir) - return true; - - /* some are set, make sure lxc.cgroup.dir is not also set*/ - if (conf->cgroup_meta.dir) - return log_error_errno(false, EINVAL, - "lxc.cgroup.dir conflicts with lxc.cgroup.dir.payload/monitor"); - - /* make sure both monitor and payload are set */ - if (!monitor_dir || !container_dir) - return log_error_errno(false, EINVAL, - "lxc.cgroup.dir.payload and lxc.cgroup.dir.monitor must both be set"); - - /* namespace_dir may be empty */ - return true; } __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, @@ -1293,7 +1332,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, int idx = 0; int i; size_t len; - char *suffix = NULL; + char *suffix; struct lxc_conf *conf; if (!ops) @@ -1310,13 +1349,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, conf = handler->conf; - if (!check_cgroup_dir_config(conf)) - return false; - - if (conf->cgroup_meta.monitor_dir) { - cgroup_tree = NULL; - monitor_cgroup = strdup(conf->cgroup_meta.monitor_dir); - } else if (conf->cgroup_meta.dir) { + if (conf->cgroup_meta.dir) { cgroup_tree = conf->cgroup_meta.dir; monitor_cgroup = must_concat(&len, conf->cgroup_meta.dir, "/", DEFAULT_MONITOR_CGROUP_PREFIX, @@ -1340,36 +1373,252 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, if (!monitor_cgroup) return ret_set_errno(false, ENOMEM); - if (!conf->cgroup_meta.monitor_dir) { - suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN; - *suffix = '\0'; - } + suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN; + *suffix = '\0'; do { - if (idx && suffix) + if (idx) sprintf(suffix, "-%d", idx); for (i = 0; ops->hierarchies[i]; i++) { - if (cgroup_tree_create(ops, handler->conf, - ops->hierarchies[i], cgroup_tree, - monitor_cgroup, false, NULL)) + if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, monitor_cgroup, false)) continue; - DEBUG("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path ?: "(null)"); + ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path ?: "(null)"); for (int j = 0; j < i; j++) cgroup_tree_leaf_remove(ops->hierarchies[j], false); idx++; break; } - } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix); + } while (ops->hierarchies[i] && idx > 0 && idx < 1000); - if (idx == 1000 || (!suffix && idx != 0)) - return log_error_errno(false, ERANGE, "Failed to create monitor cgroup"); + if (idx == 1000) + return ret_set_errno(false, ERANGE); ops->monitor_cgroup = move_ptr(monitor_cgroup); return log_info(true, "The monitor process uses \"%s\" as cgroup", ops->monitor_cgroup); } +#endif + +#ifdef HAVE_ISULAD + +static bool isulad_copy_parent_file(char *path, char *file) +{ + int ret; + int len = 0; + char *value = NULL; + char *current = NULL; + char *fpath = NULL; + char *lastslash = NULL; + char oldv; + + fpath = must_make_path(path, file, NULL); + current = read_file(fpath); + + if (current == NULL) { + SYSERROR("Failed to read file \"%s\"", fpath); + free(fpath); + return false; + } + + if (strcmp(current, "\n") != 0) { + free(fpath); + free(current); + return true; + } + + free(fpath); + free(current); + + lastslash = strrchr(path, '/'); + if (lastslash == NULL) { + ERROR("Failed to detect \"/\" in \"%s\"", path); + return false; + } + oldv = *lastslash; + *lastslash = '\0'; + fpath = must_make_path(path, file, NULL); + *lastslash = oldv; + len = lxc_read_from_file(fpath, NULL, 0); + if (len <= 0) + goto on_error; + + value = must_realloc(NULL, len + 1); + ret = lxc_read_from_file(fpath, value, len); + if (ret != len) + goto on_error; + free(fpath); + + fpath = must_make_path(path, file, NULL); + ret = lxc_write_to_file(fpath, value, len, false, 0666); + if (ret < 0) + SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath); + free(fpath); + free(value); + return ret >= 0; + +on_error: + SYSERROR("Failed to read file \"%s\"", fpath); + free(fpath); + free(value); + return false; +} + +static bool build_sub_cpuset_cgroup_dir(char *cgpath) +{ + int ret; + + ret = mkdir_p(cgpath, 0755); + if (ret < 0) { + if (errno != EEXIST) { + SYSERROR("Failed to create directory \"%s\"", cgpath); + return false; + } + } + + /* copy parent's settings */ + if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) { + SYSERROR("Failed to copy \"cpuset.cpus\" settings"); + return false; + } + + /* copy parent's settings */ + if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) { + SYSERROR("Failed to copy \"cpuset.mems\" settings"); + return false; + } + + return true; +} + +static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname) +{ + char *cgpath, *slash; + bool sub_mk_success = false; + + if (!string_in_list(h->controllers, "cpuset")) + return true; + + cgname += strspn(cgname, "/"); + + slash = strchr(cgname, '/'); + + if (slash != NULL) { + while (slash) { + *slash = '\0'; + cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); + sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); + free(cgpath); + *slash = '/'; + if (!sub_mk_success) { + return false; + } + slash = strchr(slash + 1, '/'); + } + } + + cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); + sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); + free(cgpath); + if (!sub_mk_success) { + return false; + } + + return true; +} + +static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode) +{ + const char *tmp = dir; + const char *orig = dir; + + do { + int ret; + size_t cur_len; + char *makeme; + + dir = tmp + strspn(tmp, "/"); + tmp = dir + strcspn(dir, "/"); + + errno = ENOMEM; + cur_len = dir - orig; + makeme = strndup(orig, cur_len); + if (!makeme) + return -1; + + ret = mkdir(makeme, mode); + if (ret < 0) { + if (errno != EEXIST) { + SYSERROR("Failed to create directory \"%s\"", makeme); + free(makeme); + return -1; + } + } + free(makeme); + + } while (tmp != dir); + + return 0; +} +static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd) +{ + int ret; + __do_free char *path = NULL; + + path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); + + if (file_exists(path)) { // it must not already exist + ERROR("Cgroup path \"%s\" already exist.", path); + lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.", + __FILE__, __LINE__, path); + return false; + } + + if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) { + ERROR("Failed to handle legacy cpuset controller"); + return false; + } + + ret = isulad_mkdir_eexist_on_last(path, 0755); + if (ret < 0) { + ERROR("Failed to create cgroup \"%s\"", path); + return false; + } + + h->cgfd_con = lxc_open_dirfd(path); + if (h->cgfd_con < 0) + return log_error_errno(false, errno, "Failed to open %s", path); + + if (h->container_full_path == NULL) { + h->container_full_path = move_ptr(path); + } + + return true; +} + +/* isulad: create hierarchies path, if fail, return the error */ +__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + int i; + char *container_cgroup = ops->container_cgroup; + + if (!container_cgroup) { + ERROR("cgfsng_create container_cgroup is invalid"); + return false; + } + + for (i = 0; ops->hierarchies[i]; i++) { + if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) { + SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path); + return false; + } + } + + return true; +} +#else /* * Try to create the same cgroup in all hierarchies. Start with cgroup_pattern; * next cgroup_pattern-1, -2, ..., -999. @@ -1377,14 +1626,12 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, struct lxc_handler *handler) { - __do_free char *container_cgroup = NULL, - *__cgroup_tree = NULL, - *limiting_cgroup = NULL; + __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL; const char *cgroup_tree; int idx = 0; int i; size_t len; - char *suffix = NULL; + char *suffix; struct lxc_conf *conf; if (!ops) @@ -1401,25 +1648,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, conf = handler->conf; - if (!check_cgroup_dir_config(conf)) - return false; - - if (conf->cgroup_meta.container_dir) { - cgroup_tree = NULL; - - limiting_cgroup = strdup(conf->cgroup_meta.container_dir); - if (!limiting_cgroup) - return ret_set_errno(false, ENOMEM); - - if (conf->cgroup_meta.namespace_dir) { - container_cgroup = must_make_path(limiting_cgroup, - conf->cgroup_meta.namespace_dir, - NULL); - } else { - /* explicit paths but without isolation */ - container_cgroup = move_ptr(limiting_cgroup); - } - } else if (conf->cgroup_meta.dir) { + if (conf->cgroup_meta.dir) { cgroup_tree = conf->cgroup_meta.dir; container_cgroup = must_concat(&len, cgroup_tree, "/", DEFAULT_PAYLOAD_CGROUP_PREFIX, @@ -1443,38 +1672,41 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, if (!container_cgroup) return ret_set_errno(false, ENOMEM); - if (!conf->cgroup_meta.container_dir) { - suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN; - *suffix = '\0'; - } + suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN; + *suffix = '\0'; do { - if (idx && suffix) + if (idx) sprintf(suffix, "-%d", idx); for (i = 0; ops->hierarchies[i]; i++) { - if (cgroup_tree_create(ops, handler->conf, - ops->hierarchies[i], cgroup_tree, - container_cgroup, true, - limiting_cgroup)) + if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, container_cgroup, true)) continue; - DEBUG("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)"); + ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)"); for (int j = 0; j < i; j++) cgroup_tree_leaf_remove(ops->hierarchies[j], true); idx++; break; } - } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix); + } while (ops->hierarchies[i] && idx > 0 && idx < 1000); - if (idx == 1000 || (!suffix && idx != 0)) - return log_error_errno(false, ERANGE, "Failed to create container cgroup"); + if (idx == 1000) + return ret_set_errno(false, ERANGE); ops->container_cgroup = move_ptr(container_cgroup); INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup); return true; } +#endif +#ifdef HAVE_ISULAD +__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + return true; +} +#else __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, struct lxc_handler *handler) { @@ -1526,7 +1758,58 @@ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, return true; } +#endif +#ifdef HAVE_ISULAD +__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, + struct lxc_handler *handler) +{ + int len; + char pidstr[INTTYPE_TO_STRLEN(pid_t)]; + + if (!ops) + return ret_set_errno(false, ENOENT); + + if (!ops->hierarchies) + return true; + + if (!ops->container_cgroup) + return ret_set_errno(false, ENOENT); + + if (!handler || !handler->conf) + return ret_set_errno(false, EINVAL); + + len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid); + + for (int i = 0; ops->hierarchies[i]; i++) { + int ret; + char *fullpath; + int retry_count = 0; + int max_retry = 10; + + fullpath = must_make_path(ops->hierarchies[i]->container_full_path, + "cgroup.procs", NULL); +retry: + ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666); + if (ret != 0) { + if (retry_count < max_retry) { + SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count); + (void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup); + (void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755); + usleep(100 * 1000); /* 100 millisecond */ + retry_count++; + goto retry; + } + SYSERROR("Failed to enter cgroup \"%s\"", fullpath); + free(fullpath); + return false; + } + free(fullpath); + } + + return true; +} +#else __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, struct lxc_handler *handler) { @@ -1558,6 +1841,7 @@ __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, return true; } +#endif static int fchowmodat(int dirfd, const char *path, uid_t chown_uid, gid_t chown_gid, mode_t chmod_mode) @@ -1805,6 +2089,196 @@ static inline int cg_mount_cgroup_full(int type, struct hierarchy *h, return __cg_mount_direct(type, h, controllerpath); } +#ifdef HAVE_ISULAD +__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, + struct lxc_handler *handler, + const char *root, int type) +{ + int i, ret; + char *tmpfspath = NULL; + char *systemdpath = NULL; + char *unifiedpath = NULL; + bool has_cgns = false, retval = false, wants_force_mount = false; + char **merged = NULL; + + if ((type & LXC_AUTO_CGROUP_MASK) == 0) + return true; + + if (type & LXC_AUTO_CGROUP_FORCE) { + type &= ~LXC_AUTO_CGROUP_FORCE; + wants_force_mount = true; + } + + if (!wants_force_mount) { + if (!lxc_list_empty(&handler->conf->keepcaps)) + wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps); + else + wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps); + } + + has_cgns = cgns_supported(); + if (has_cgns && !wants_force_mount) + return true; + + if (type == LXC_AUTO_CGROUP_NOSPEC) + type = LXC_AUTO_CGROUP_MIXED; + else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC) + type = LXC_AUTO_CGROUP_FULL_MIXED; + + /* Mount tmpfs */ + tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL); + if (mkdir_p(tmpfspath, 0755) < 0) { + ERROR("Failed to create directory: %s", tmpfspath); + goto on_error; + } + ret = safe_mount(NULL, tmpfspath, "tmpfs", + MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, + "size=10240k,mode=755", root); + if (ret < 0) + goto on_error; + + for (i = 0; ops->hierarchies[i]; i++) { + char *controllerpath = NULL; + char *path2 = NULL; + struct hierarchy *h = ops->hierarchies[i]; + char *controller = strrchr(h->mountpoint, '/'); + + if (!controller) + continue; + controller++; + + // isulad: symlink subcgroup + if (strchr(controller, ',') != NULL) { + int pret; + pret = lxc_append_string(&merged, controller); + if (pret < 0) + goto on_error; + } + + controllerpath = must_make_path(tmpfspath, controller, NULL); + if (dir_exists(controllerpath)) { + free(controllerpath); + continue; + } + + ret = mkdir(controllerpath, 0755); + if (ret < 0) { + SYSERROR("Error creating cgroup path: %s", controllerpath); + free(controllerpath); + goto on_error; + } + + if (has_cgns && wants_force_mount) { + /* If cgroup namespaces are supported but the container + * will not have CAP_SYS_ADMIN after it has started we + * need to mount the cgroups manually. + */ + ret = cg_mount_in_cgroup_namespace(type, h, controllerpath); + free(controllerpath); + if (ret < 0) + goto on_error; + + continue; + } + + ret = cg_mount_cgroup_full(type, h, controllerpath); + if (ret < 0) { + free(controllerpath); + goto on_error; + } + + if (!cg_mount_needs_subdirs(type)) { + free(controllerpath); + continue; + } + + // isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container, + // isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container + path2 = must_make_path(controllerpath, NULL); + ret = mkdir_p(path2, 0755); + if (ret < 0) { + free(controllerpath); + free(path2); + goto on_error; + } + + ret = cg_legacy_mount_controllers(type, h, controllerpath, + path2, ops->container_cgroup); + free(controllerpath); + free(path2); + if (ret < 0) + goto on_error; + } + + // isulad: symlink subcgroup + if (merged) { + char **mc = NULL; + for (mc = merged; *mc; mc++) { + char *token = NULL; + char *copy = must_copy_string(*mc); + lxc_iterate_parts(token, copy, ",") { + int mret; + char *link; + link = must_make_path(tmpfspath, token, NULL); + mret = symlink(*mc, link); + if (mret < 0 && errno != EEXIST) { + SYSERROR("Failed to create link %s for target %s", link, *mc); + free(copy); + free(link); + goto on_error; + } + free(link); + } + free(copy); + } + } + + + // isulad: remount /sys/fs/cgroup to readonly + if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) { + ret = mount(tmpfspath, tmpfspath, "bind", + MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL); + if (ret < 0) { + SYSERROR("Failed to remount /sys/fs/cgroup."); + goto on_error; + } + } + + // isulad: remount /sys/fs/cgroup/systemd to readwrite for system container + if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0) { + // isulad: don't use the unified hierarchy for the systemd cgroup + unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL); + if (dir_exists(unifiedpath)) { + ret = umount2(unifiedpath, MNT_DETACH); + if (ret < 0) { + SYSERROR("Failed to umount /sys/fs/cgroup/unified."); + goto on_error; + } + } + + systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL); + ret = mount(systemdpath, systemdpath, "bind", + MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_BIND|MS_REMOUNT, NULL); + if (ret < 0) { + SYSERROR("Failed to remount /sys/fs/cgroup/systemd."); + goto on_error; + } + } + + retval = true; + +on_error: + free(tmpfspath); + if (systemdpath != NULL) { + free(systemdpath); + } + if (unifiedpath != NULL) { + free(unifiedpath); + } + lxc_free_array((void **)merged, free); + return retval; +} +#else __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, struct lxc_handler *handler, const char *root, int type) @@ -1830,24 +2304,11 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, wants_force_mount = true; } - if (!wants_force_mount) { + if (!wants_force_mount){ if (!lxc_list_empty(&handler->conf->keepcaps)) wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps); else wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps); - - /* - * Most recent distro versions currently have init system that - * do support cgroup2 but do not mount it by default unless - * explicitly told so even if the host is cgroup2 only. That - * means they often will fail to boot. Fix this by pre-mounting - * cgroup2 by default. We will likely need to be doing this a - * few years until all distros have switched over to cgroup2 at - * which point we can safely assume that their init systems - * will mount it themselves. - */ - if (pure_unified_layout(ops)) - wants_force_mount = true; } has_cgns = cgns_supported(); @@ -1930,6 +2391,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, return true; } +#endif /* Only root needs to escape to the cgroup of its init. */ __cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops, @@ -2046,14 +2508,78 @@ static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata, return LXC_MAINLOOP_CONTINUE; } -static int cg_unified_freeze_do(struct cgroup_ops *ops, int timeout, - const char *state_string, - int state_num, - const char *epoll_error, - const char *wait_error) +static int cg_unified_freeze(struct cgroup_ops *ops, int timeout) +{ + __do_close int fd = -EBADF; + call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL; + int ret; + struct lxc_epoll_descr descr; + struct hierarchy *h; + + h = ops->unified; + if (!h) + return ret_set_errno(-1, ENOENT); + + if (!h->container_full_path) + return ret_set_errno(-1, EEXIST); + + if (timeout != 0) { + __do_free char *events_file = NULL; + + events_file = must_make_path(h->container_full_path, "cgroup.events", NULL); + fd = open(events_file, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return log_error_errno(-1, errno, "Failed to open cgroup.events file"); + + ret = lxc_mainloop_open(&descr); + if (ret) + return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container freeze"); + + /* automatically cleaned up now */ + descr_ptr = &descr; + + ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){1})); + if (ret < 0) + return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop"); + } + + ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "1", 1); + if (ret < 0) + return log_error_errno(-1, errno, "Failed to open cgroup.freeze file"); + + if (timeout != 0 && lxc_mainloop(&descr, timeout)) + return log_error_errno(-1, errno, "Failed to wait for container to be frozen"); + + return 0; +} + +__cgfsng_ops static int cgfsng_freeze(struct cgroup_ops *ops, int timeout) +{ + if (!ops->hierarchies) + return ret_set_errno(-1, ENOENT); + + if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED) + return cg_legacy_freeze(ops); + + return cg_unified_freeze(ops, timeout); +} + +static int cg_legacy_unfreeze(struct cgroup_ops *ops) +{ + struct hierarchy *h; + + h = get_hierarchy(ops, "freezer"); + if (!h) + return ret_set_errno(-1, ENOENT); + + return lxc_write_openat(h->container_full_path, "freezer.state", + "THAWED", STRLITERALLEN("THAWED")); +} + +static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout) { __do_close int fd = -EBADF; - call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL; + call_cleaner(lxc_mainloop_close)struct lxc_epoll_descr *descr_ptr = NULL; int ret; struct lxc_epoll_descr descr; struct hierarchy *h; @@ -2075,63 +2601,26 @@ static int cg_unified_freeze_do(struct cgroup_ops *ops, int timeout, ret = lxc_mainloop_open(&descr); if (ret) - return log_error_errno(-1, errno, "%s", epoll_error); + return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container unfreeze"); /* automatically cleaned up now */ descr_ptr = &descr; - ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num)); + ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){0})); if (ret < 0) return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop"); } - ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", state_string, 1); + ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "0", 1); if (ret < 0) return log_error_errno(-1, errno, "Failed to open cgroup.freeze file"); if (timeout != 0 && lxc_mainloop(&descr, timeout)) - return log_error_errno(-1, errno, "%s", wait_error); + return log_error_errno(-1, errno, "Failed to wait for container to be unfrozen"); return 0; } -static int cg_unified_freeze(struct cgroup_ops *ops, int timeout) -{ - return cg_unified_freeze_do(ops, timeout, "1", 1, - "Failed to create epoll instance to wait for container freeze", - "Failed to wait for container to be frozen"); -} - -__cgfsng_ops static int cgfsng_freeze(struct cgroup_ops *ops, int timeout) -{ - if (!ops->hierarchies) - return ret_set_errno(-1, ENOENT); - - if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED) - return cg_legacy_freeze(ops); - - return cg_unified_freeze(ops, timeout); -} - -static int cg_legacy_unfreeze(struct cgroup_ops *ops) -{ - struct hierarchy *h; - - h = get_hierarchy(ops, "freezer"); - if (!h) - return ret_set_errno(-1, ENOENT); - - return lxc_write_openat(h->container_full_path, "freezer.state", - "THAWED", STRLITERALLEN("THAWED")); -} - -static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout) -{ - return cg_unified_freeze_do(ops, timeout, "0", 0, - "Failed to create epoll instance to wait for container unfreeze", - "Failed to wait for container to be unfrozen"); -} - __cgfsng_ops static int cgfsng_unfreeze(struct cgroup_ops *ops, int timeout) { if (!ops->hierarchies) @@ -2143,8 +2632,8 @@ __cgfsng_ops static int cgfsng_unfreeze(struct cgroup_ops *ops, int timeout) return cg_unified_unfreeze(ops, timeout); } -static const char *cgfsng_get_cgroup_do(struct cgroup_ops *ops, - const char *controller, bool limiting) +__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, + const char *controller) { struct hierarchy *h; @@ -2153,27 +2642,33 @@ static const char *cgfsng_get_cgroup_do(struct cgroup_ops *ops, return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", controller ? controller : "(null)"); - if (limiting) - return h->container_limit_path - ? h->container_limit_path + strlen(h->mountpoint) - : NULL; +#ifdef HAVE_ISULAD + if (!h->container_full_path) + h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL); +#endif return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL; } -__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, +#ifdef HAVE_ISULAD +__cgfsng_ops static const char *cgfsng_get_cgroup_full_path(struct cgroup_ops *ops, const char *controller) { - return cgfsng_get_cgroup_do(ops, controller, false); -} + struct hierarchy *h; -__cgfsng_ops static const char *cgfsng_get_limiting_cgroup(struct cgroup_ops *ops, - const char *controller) -{ - return cgfsng_get_cgroup_do(ops, controller, true); + h = get_hierarchy(ops, controller); + if (!h) + return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", + controller ? controller : "(null)"); + + if (!h->container_full_path) + h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL); + + return h->container_full_path; } +#endif /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path, * which must be freed by the caller. @@ -2481,6 +2976,44 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, return true; } +#ifdef HAVE_ISULAD +__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, + char *value, size_t len, const char *name, + const char *lxcpath) +{ + int ret = -1; + size_t controller_len; + char *controller, *p, *path; + struct hierarchy *h; + + controller_len = strlen(filename); + controller = alloca(controller_len + 1); + (void)strlcpy(controller, filename, controller_len + 1); + + p = strchr(controller, '.'); + if (p) + *p = '\0'; + + const char *ori_path = ops->get_cgroup(ops, controller); + if (ori_path == NULL) { + ERROR("Failed to get cgroup path:%s", controller); + return -1; + } + path = safe_strdup(ori_path); + + h = get_hierarchy(ops, controller); + if (h) { + char *fullpath; + + fullpath = build_full_cgpath_from_monitorpath(h, path, filename); + ret = lxc_read_from_file(fullpath, value, len); + free(fullpath); + } + free(path); + + return ret; +} +#else /* Called externally (i.e. from 'lxc-cgroup') to query cgroup limits. Here we * don't have a cgroup_data set up, so we ask the running container through the * commands API for the cgroup path. @@ -2503,7 +3036,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, if (p) *p = '\0'; - path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller); + path = lxc_cmd_get_cgroup_path(name, lxcpath, controller); /* not running */ if (!path) return -1; @@ -2518,6 +3051,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, return ret; } +#endif static int device_cgroup_parse_access(struct device_item *device, const char *val) { @@ -2544,7 +3078,7 @@ static int device_cgroup_parse_access(struct device_item *device, const char *va return 0; } -static int device_cgroup_rule_parse(struct device_item *device, const char *key, +int device_cgroup_rule_parse(struct device_item *device, const char *key, const char *val) { int count, ret; @@ -2631,6 +3165,44 @@ static int device_cgroup_rule_parse(struct device_item *device, const char *key, return device_cgroup_parse_access(device, ++val); } +#ifdef HAVE_ISULAD +__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, + const char *filename, const char *value, + const char *name, const char *lxcpath) +{ + int ret = -1; + size_t controller_len; + char *controller, *p, *path; + struct hierarchy *h; + + controller_len = strlen(filename); + controller = alloca(controller_len + 1); + (void)strlcpy(controller, filename, controller_len + 1); + + p = strchr(controller, '.'); + if (p) + *p = '\0'; + + const char *ori_path = ops->get_cgroup(ops, controller); + if (ori_path == NULL) { + ERROR("Failed to get cgroup path:%s", controller); + return -1; + } + path = safe_strdup(ori_path); + + h = get_hierarchy(ops, controller); + if (h) { + char *fullpath; + + fullpath = build_full_cgpath_from_monitorpath(h, path, filename); + ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); + free(fullpath); + } + free(path); + + return ret; +} +#else /* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits. Here we * don't have a cgroup_data set up, so we ask the running container through the * commands API for the cgroup path. @@ -2668,7 +3240,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, return 0; } - path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller); + path = lxc_cmd_get_cgroup_path(name, lxcpath, controller); /* not running */ if (!path) return -1; @@ -2683,6 +3255,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, return ret; } +#endif /* take devices cgroup line * /dev/foo rwx @@ -2726,9 +3299,6 @@ static int device_cgroup_rule_parse_devpath(struct device_item *device, return ret_set_errno(-1, EINVAL); } - if (!mode) - return ret_errno(EINVAL); - if (device_cgroup_parse_access(device, mode) < 0) return -1; @@ -2777,11 +3347,12 @@ static int convert_devpath(const char *invalue, char *dest) return 0; } +#ifndef HAVE_ISULAD /* Called from setup_limits - here we have the container's cgroup_data because * we created the cgroups. */ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, - const char *value, bool is_cpuset) + const char *value) { __do_free char *controller = NULL; char *p; @@ -2807,12 +3378,116 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, if (!h) return log_error_errno(-ENOENT, ENOENT, "Failed to setup limits for the \"%s\" controller. The controller seems to be unused by \"cgfsng\" cgroup driver or not enabled on the cgroup hierarchy", controller); - if (is_cpuset) { - int ret = lxc_write_openat(h->container_full_path, filename, value, strlen(value)); - if (ret) + return lxc_write_openat(h->container_full_path, filename, value, strlen(value)); +} +#endif + +#ifdef HAVE_ISULAD +/* Called from setup_limits - here we have the container's cgroup_data because + * we created the cgroups. + */ +static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename, + char *value, size_t len) +{ + char *fullpath = NULL; + char *p = NULL; + struct hierarchy *h = NULL; + int ret = 0; + char *controller = NULL; + + len = strlen(filename); + if (SIZE_MAX - 1 < len) { + errno = EINVAL; + return -1; + } + controller = calloc(1, len + 1); + if (controller == NULL) { + errno = ENOMEM; + return -1; + } + (void)strlcpy(controller, filename, len + 1); + + p = strchr(controller, '.'); + if (p) + *p = '\0'; + + + h = get_hierarchy(ops, controller); + if (!h) { + ERROR("Failed to setup limits for the \"%s\" controller. " + "The controller seems to be unused by \"cgfsng\" cgroup " + "driver or not enabled on the cgroup hierarchy", + controller); + errno = ENOENT; + free(controller); + return -ENOENT; + } + + fullpath = must_make_path(h->container_full_path, filename, NULL); + ret = lxc_read_from_file(fullpath, value, len); + free(fullpath); + free(controller); + return ret; +} + +static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, + const char *value) +{ + size_t len; + char *fullpath, *p; + /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */ + char converted_value[50]; + struct hierarchy *h; + int ret = 0; + char *controller = NULL; + int retry_count = 0; + int max_retry = 10; + char *container_cgroup = ops->container_cgroup; + + len = strlen(filename); + controller = alloca(len + 1); + (void)strlcpy(controller, filename, len + 1); + + p = strchr(controller, '.'); + if (p) + *p = '\0'; + + if (strcmp("devices.allow", filename) == 0 && value[0] == '/') { + ret = convert_devpath(value, converted_value); + if (ret < 0) return ret; + value = converted_value; } - return lxc_write_openat(h->container_limit_path, filename, value, strlen(value)); + + h = get_hierarchy(ops, controller); + if (!h) { + ERROR("Failed to setup limits for the \"%s\" controller. " + "The controller seems to be unused by \"cgfsng\" cgroup " + "driver or not enabled on the cgroup hierarchy", + controller); + errno = ENOENT; + return -ENOENT; + } + + fullpath = must_make_path(h->container_full_path, filename, NULL); + +retry: + ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); + if (ret != 0) { + if (retry_count < max_retry) { + SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath); + (void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup); + (void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755); + usleep(100 * 1000); /* 100 millisecond */ + retry_count++; + goto retry; + } + lxc_write_error_message(ops->errfd, + "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".", + __FILE__, __LINE__, value, fullpath, strerror(errno)); + } + free(fullpath); + return ret; } __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, @@ -2824,6 +3499,8 @@ __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, struct lxc_list *iterator, *next; struct lxc_cgroup *cg; bool ret = false; + char value[21 + 1] = { 0 }; + long long int readvalue, setvalue; if (!ops) return ret_set_errno(false, ENOENT); @@ -2838,8 +3515,99 @@ __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, if (!ops->hierarchies) return ret_set_errno(false, EINVAL); - if (pure_unified_layout(ops)) - return log_warn_errno(true, EINVAL, "Ignoring legacy cgroup limits on pure cgroup2 system"); + sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings); + if (!sorted_cgroup_settings) + return false; + + lxc_list_for_each(iterator, sorted_cgroup_settings) { + cg = iterator->elem; + + if (do_devices == !strncmp("devices", cg->subsystem, 7)) { + const char *cgvalue = cg->value; + if (strcmp(cg->subsystem, "files.limit") == 0) { + if (lxc_safe_long_long(cgvalue, &setvalue) != 0) { + SYSERROR("Invalid integer value %s", cgvalue); + goto out; + } + if (setvalue <= 0) { + cgvalue = "max"; + } + } + if (isulad_cg_legacy_set_data(ops, cg->subsystem, cgvalue)) { + if (do_devices && (errno == EACCES || errno == EPERM)) { + SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue); + continue; + } + SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue); + goto out; + } + DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cgvalue); + } + + // isulad: check cpu shares + if (strcmp(cg->subsystem, "cpu.shares") == 0) { + if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) { + SYSERROR("Error get %s", cg->subsystem); + goto out; + } + trim(value); + if (lxc_safe_long_long(cg->value, &setvalue) != 0) { + SYSERROR("Invalid value %s", cg->value); + goto out; + } + if (lxc_safe_long_long(value, &readvalue) != 0) { + SYSERROR("Invalid value %s", value); + goto out; + } + if (setvalue > readvalue) { + ERROR("The maximum allowed cpu-shares is %s", value); + lxc_write_error_message(ops->errfd, + "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".", + __FILE__, __LINE__, value); + goto out; + } else if (setvalue < readvalue) { + ERROR("The minimum allowed cpu-shares is %s", value); + lxc_write_error_message(ops->errfd, + "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".", + __FILE__, __LINE__, value); + goto out; + } + } + } + + ret = true; + INFO("Limits for the legacy cgroup hierarchies have been setup"); +out: + lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) { + lxc_list_del(iterator); + free(iterator); + } + + return ret; +} +#else +__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, + struct lxc_conf *conf, + bool do_devices) +{ + __do_free struct lxc_list *sorted_cgroup_settings = NULL; + struct lxc_list *cgroup_settings = &conf->cgroup; + struct lxc_list *iterator, *next; + struct lxc_cgroup *cg; + bool ret = false; + + if (!ops) + return ret_set_errno(false, ENOENT); + + if (!conf) + return ret_set_errno(false, EINVAL); + + cgroup_settings = &conf->cgroup; + if (lxc_list_empty(cgroup_settings)) + return true; + + if (!ops->hierarchies) + return ret_set_errno(false, EINVAL); sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings); if (!sorted_cgroup_settings) @@ -2849,7 +3617,7 @@ __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, cg = iterator->elem; if (do_devices == !strncmp("devices", cg->subsystem, 7)) { - if (cg_legacy_set_data(ops, cg->subsystem, cg->value, strncmp("cpuset", cg->subsystem, 6) == 0)) { + if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) { if (do_devices && (errno == EACCES || errno == EPERM)) { SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value); continue; @@ -2871,6 +3639,7 @@ out: return ret; } +#endif /* * Some of the parsing logic comes from the original cgroup device v1 @@ -2918,12 +3687,9 @@ __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops, return ret_set_errno(false, EINVAL); conf = handler->conf; - cgroup_settings = &conf->cgroup2; - if (lxc_list_empty(cgroup_settings)) + if (lxc_list_empty(&conf->cgroup2)) return true; - - if (!pure_unified_layout(ops)) - return log_warn_errno(true, EINVAL, "Ignoring cgroup2 limits on legacy cgroup system"); + cgroup_settings = &conf->cgroup2; if (!ops->unified) return false; @@ -2937,7 +3703,7 @@ __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops, ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem, cg->value); } else { - ret = lxc_write_openat(h->container_limit_path, + ret = lxc_write_openat(h->container_full_path, cg->subsystem, cg->value, strlen(cg->value)); if (ret < 0) @@ -3013,7 +3779,7 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops, return log_error_errno(false, ENOMEM, "Failed to finalize bpf program"); ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE, - unified->container_limit_path, + unified->container_full_path, BPF_F_ALLOW_MULTI); if (ret) return log_error_errno(false, ENOMEM, "Failed to attach bpf program"); @@ -3085,6 +3851,12 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup) return true; } +#ifdef HAVE_ISULAD +__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) +{ + return true; +} +#else __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) { if (!ops) @@ -3092,6 +3864,7 @@ __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup); } +#endif __cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops) { @@ -3144,7 +3917,7 @@ static void cg_unified_delegate(char ***delegate) return; } - lxc_iterate_parts(token, buf, " \t\n") { + lxc_iterate_parts (token, buf, " \t\n") { /* * We always need to chown this for both cgroup and * cgroup2. @@ -3192,7 +3965,6 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg __do_free char *base_cgroup = NULL, *mountpoint = NULL; __do_free_string_list char **controller_list = NULL; int type; - bool writeable; struct hierarchy *new; type = get_cgroup_version(line); @@ -3242,6 +4014,23 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg trim(base_cgroup); prune_init_scope(base_cgroup); +#ifdef HAVE_ISULAD + /* isulad: do not test writeable, if we run isulad in docker without cgroup namespace. + * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */ + + /* + * reason:base cgroup may be started with /system.slice when cg_hybrid_init + * read /proc/1/cgroup on host, and cgroup init will set all containers + * cgroup path under /sys/fs/cgroup//system.slice/xxx/lxc + * directory, this is not consistent with docker. The default cgroup path + * should be under /sys/fs/cgroup//lxc directory. + */ + + if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') { + base_cgroup[1] = '\0'; + } +#else + bool writeable; if (type == CGROUP2_SUPER_MAGIC) writeable = test_writeable_v2(mountpoint, base_cgroup); else @@ -3250,7 +4039,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg TRACE("The %s group is not writeable", base_cgroup); continue; } - +#endif if (type == CGROUP2_SUPER_MAGIC) { char *cgv2_ctrl_path; @@ -3403,7 +4192,45 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf) return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map)); } -__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops) +#ifdef HAVE_ISULAD +__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf) +{ + const char *cgroup_pattern; + const char *cgroup_tree; + __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL; + size_t len; + + if (!ops) + return ret_set_errno(-1, ENOENT); + + /* copy system-wide cgroup information */ + cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern"); + if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0) + ops->cgroup_pattern = must_copy_string(cgroup_pattern); + + if (conf->cgroup_meta.dir) { + cgroup_tree = conf->cgroup_meta.dir; + container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL); + } else if (ops->cgroup_pattern) { + __cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern); + if (!__cgroup_tree) + return ret_set_errno(-1, ENOMEM); + + cgroup_tree = __cgroup_tree; + container_cgroup = must_concat(&len, cgroup_tree, NULL); + } else { + cgroup_tree = NULL; + container_cgroup = must_concat(&len, conf->name, NULL); + } + if (!container_cgroup) + return ret_set_errno(-1, ENOMEM); + + ops->container_cgroup = move_ptr(container_cgroup); + + return 0; +} +#else +__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf) { const char *cgroup_pattern; @@ -3417,6 +4244,7 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops) return 0; } +#endif struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) { @@ -3433,7 +4261,12 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) return NULL; cgfsng_ops->data_init = cgfsng_data_init; +#ifdef HAVE_ISULAD + cgfsng_ops->errfd = conf ? conf->errpipe[1] : -1; + cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy; +#else cgfsng_ops->payload_destroy = cgfsng_payload_destroy; +#endif cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy; cgfsng_ops->monitor_create = cgfsng_monitor_create; cgfsng_ops->monitor_enter = cgfsng_monitor_enter; @@ -3446,6 +4279,9 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies; cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies; cgfsng_ops->get_cgroup = cgfsng_get_cgroup; +#ifdef HAVE_ISULAD + cgfsng_ops->get_cgroup_full_path = cgfsng_get_cgroup_full_path; +#endif cgfsng_ops->get = cgfsng_get; cgfsng_ops->set = cgfsng_set; cgfsng_ops->freeze = cgfsng_freeze; @@ -3458,7 +4294,6 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) cgfsng_ops->chown = cgfsng_chown; cgfsng_ops->mount = cgfsng_mount; cgfsng_ops->devices_activate = cgfsng_devices_activate; - cgfsng_ops->get_limiting_cgroup = cgfsng_get_limiting_cgroup; return move_ptr(cgfsng_ops); } diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c index 7c94fd83b..ad46d5c99 100644 --- a/src/lxc/cgroups/cgroup.c +++ b/src/lxc/cgroups/cgroup.c @@ -31,7 +31,7 @@ struct cgroup_ops *cgroup_init(struct lxc_conf *conf) if (!cgroup_ops) return log_error_errno(NULL, errno, "Failed to initialize cgroup driver"); - if (cgroup_ops->data_init(cgroup_ops)) { + if (cgroup_ops->data_init(cgroup_ops, conf)) { cgroup_exit(cgroup_ops); return log_error_errno(NULL, errno, "Failed to initialize cgroup data"); diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h index c5bf7941a..a9048c44a 100644 --- a/src/lxc/cgroups/cgroup.h +++ b/src/lxc/cgroups/cgroup.h @@ -54,11 +54,7 @@ typedef enum { * init's cgroup (if root). * * @container_full_path - * - The full path to the container's cgroup. - * - * @container_limit_path - * - The full path to the container's limiting cgroup. May simply point to - * container_full_path. + * - The full path to the containers cgroup. * * @monitor_full_path * - The full path to the monitor's cgroup. @@ -81,18 +77,15 @@ struct hierarchy { char *mountpoint; char *container_base_path; char *container_full_path; - char *container_limit_path; char *monitor_full_path; int version; /* cgroup2 only */ unsigned int bpf_device_controller:1; - /* container cgroup fd */ - int cgfd_con; - /* limiting cgroup fd (may be equal to cgfd_con if not separated) */ - int cgfd_limit; /* monitor cgroup fd */ + int cgfd_con; + /* container cgroup fd */ int cgfd_mon; }; @@ -109,6 +102,10 @@ struct cgroup_ops { char *container_cgroup; char *monitor_cgroup; +#ifdef HAVE_ISULAD + int errfd; +#endif + /* @hierarchies * - A NULL-terminated array of struct hierarchy, one per legacy * hierarchy. No duplicates. First sufficient, writeable mounted @@ -146,14 +143,21 @@ struct cgroup_ops { */ cgroup_layout_t cgroup_layout; - int (*data_init)(struct cgroup_ops *ops); + int (*data_init)(struct cgroup_ops *ops, struct lxc_conf *conf); +#ifdef HAVE_ISULAD + bool (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); +#else void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); +#endif void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*monitor_enter)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler); bool (*payload_enter)(struct cgroup_ops *ops, struct lxc_handler *handler); const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller); +#ifdef HAVE_ISULAD + const char *(*get_cgroup_full_path)(struct cgroup_ops *ops, const char *controller); +#endif bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf); int (*num_hierarchies)(struct cgroup_ops *ops); bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out); @@ -176,7 +180,6 @@ struct cgroup_ops { bool (*monitor_delegate_controllers)(struct cgroup_ops *ops); bool (*payload_delegate_controllers)(struct cgroup_ops *ops); void (*payload_finalize)(struct cgroup_ops *ops); - const char *(*get_limiting_cgroup)(struct cgroup_ops *ops, const char *controller); }; extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf); diff --git a/src/lxc/cgroups/cgroup2_devices.c b/src/lxc/cgroups/cgroup2_devices.c index 04ba7b332..4efb28fbd 100644 --- a/src/lxc/cgroups/cgroup2_devices.c +++ b/src/lxc/cgroups/cgroup2_devices.c @@ -167,7 +167,7 @@ struct bpf_program *bpf_program_new(uint32_t prog_type) { __do_free struct bpf_program *prog = NULL; - prog = zalloc(sizeof(struct bpf_program)); + prog = calloc(1, sizeof(struct bpf_program)); if (!prog) return NULL; @@ -183,6 +183,9 @@ struct bpf_program *bpf_program_new(uint32_t prog_type) int bpf_program_init(struct bpf_program *prog) { + if (!prog) + return ret_set_errno(-1, EINVAL); + const struct bpf_insn pre_insn[] = { /* load device type to r2 */ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, access_type)), @@ -199,17 +202,19 @@ int bpf_program_init(struct bpf_program *prog) BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, minor)), }; - if (!prog) - return ret_set_errno(-1, EINVAL); - return bpf_program_add_instructions(prog, pre_insn, ARRAY_SIZE(pre_insn)); } int bpf_program_append_device(struct bpf_program *prog, struct device_item *device) { + int ret; int jump_nr = 1; - int access_mask, device_type, ret; - struct bpf_insn bpf_access_decision[2]; + struct bpf_insn bpf_access_decision[] = { + BPF_MOV64_IMM(BPF_REG_0, device->allow), + BPF_EXIT_INSN(), + }; + int access_mask; + int device_type; if (!prog || !device) return ret_set_errno(-1, EINVAL); @@ -280,8 +285,6 @@ int bpf_program_append_device(struct bpf_program *prog, struct device_item *devi return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); } - bpf_access_decision[0] = BPF_MOV64_IMM(BPF_REG_0, device->allow); - bpf_access_decision[1] = BPF_EXIT_INSN(); ret = bpf_program_add_instructions(prog, bpf_access_decision, ARRAY_SIZE(bpf_access_decision)); if (ret) @@ -292,7 +295,10 @@ int bpf_program_append_device(struct bpf_program *prog, struct device_item *devi int bpf_program_finalize(struct bpf_program *prog) { - struct bpf_insn ins[2]; + struct bpf_insn ins[] = { + BPF_MOV64_IMM(BPF_REG_0, prog->device_list_type), + BPF_EXIT_INSN(), + }; if (!prog) return ret_set_errno(-1, EINVAL); @@ -301,9 +307,6 @@ int bpf_program_finalize(struct bpf_program *prog) prog->device_list_type == LXC_BPF_DEVICE_CGROUP_BLACKLIST ? "blacklist" : "whitelist"); - - ins[0] = BPF_MOV64_IMM(BPF_REG_0, prog->device_list_type); - ins[1] = BPF_EXIT_INSN(); return bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); } @@ -337,12 +340,12 @@ static int bpf_program_load_kernel(struct bpf_program *prog, char *log_buf, int bpf_program_cgroup_attach(struct bpf_program *prog, int type, const char *path, uint32_t flags) { - __do_close int fd = -EBADF; __do_free char *copy = NULL; + __do_close int fd = -EBADF; union bpf_attr attr; int ret; - if (!path || !prog) + if (!prog) return ret_set_errno(-1, EINVAL); if (flags & ~(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)) @@ -392,8 +395,8 @@ int bpf_program_cgroup_attach(struct bpf_program *prog, int type, int bpf_program_cgroup_detach(struct bpf_program *prog) { - __do_close int fd = -EBADF; int ret; + __do_close int fd = -EBADF; if (!prog) return 0; @@ -441,9 +444,6 @@ int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device) __do_free struct device_item *new_device = NULL; struct lxc_list *it; - if (!conf || !device) - return ret_errno(EINVAL); - lxc_list_for_each(it, &conf->devices) { struct device_item *cur = it->elem; @@ -502,11 +502,12 @@ int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device) bool bpf_devices_cgroup_supported(void) { - __do_bpf_program_free struct bpf_program *prog = NULL; const struct bpf_insn dummy[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }; + + __do_bpf_program_free struct bpf_program *prog = NULL; int ret; if (geteuid() != 0) @@ -514,7 +515,7 @@ bool bpf_devices_cgroup_supported(void) "The bpf device cgroup requires real root"); prog = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE); - if (!prog) + if (prog < 0) return log_trace(false, "Failed to allocate new bpf device cgroup program"); ret = bpf_program_add_instructions(prog, dummy, ARRAY_SIZE(dummy)); diff --git a/src/lxc/cmd/lxc-update-config.in b/src/lxc/cmd/lxc-update-config.in index 0a03f06d0..95187d405 100644 --- a/src/lxc/cmd/lxc-update-config.in +++ b/src/lxc/cmd/lxc-update-config.in @@ -74,7 +74,7 @@ sed -i \ -e 's/\([[:blank:]*]\|#*\)\(lxc\.stopsignal\)\([[:blank:]*]\|=\)/\1lxc\.signal\.stop\3/g' \ -e 's/\([[:blank:]*]\|#*\)\(lxc\.syslog\)\([[:blank:]*]\|=\)/\1lxc\.log\.syslog\3/g' \ -e 's/\([[:blank:]*]\|#*\)\(lxc\.loglevel\)\([[:blank:]*]\|=\)/\1lxc\.log\.level\3/g' \ --e 's/\([[:blank:]*]\|#*\)\(lxc\.logfile\)\([[:blank:]*]\|=\)/\1lxc\.log\.file\3/g' \ +-e 's/\([[:blank:]*]\|#*\)\(lxc\.logfile\)\([[:blank:]*]\|=\)/1lxc\.log\.file\3/g' \ -e 's/\([[:blank:]*]\|#*\)\(lxc\.init_cmd\)\([[:blank:]*]\|=\)/\1lxc\.init\.cmd\3/g' \ -e 's/\([[:blank:]*]\|#*\)\(lxc\.init_uid\)\([[:blank:]*]\|=\)/\1lxc\.init\.uid\3/g' \ -e 's/\([[:blank:]*]\|#*\)\(lxc\.init_gid\)\([[:blank:]*]\|=\)/\1lxc\.init\.gid\3/g' \ diff --git a/src/lxc/cmd/lxc_init.c b/src/lxc/cmd/lxc_init.c index a03631f1a..a52793343 100644 --- a/src/lxc/cmd/lxc_init.c +++ b/src/lxc/cmd/lxc_init.c @@ -28,7 +28,7 @@ #include "initutils.h" #include "memory_utils.h" #include "parse.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "string_utils.h" /* option keys for long only options */ diff --git a/src/lxc/cmd/lxc_monitord.c b/src/lxc/cmd/lxc_monitord.c index bcb289ca6..3ec7a756d 100644 --- a/src/lxc/cmd/lxc_monitord.c +++ b/src/lxc/cmd/lxc_monitord.c @@ -28,7 +28,7 @@ #include "log.h" #include "mainloop.h" #include "monitor.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "utils.h" #define CLIENTFDS_CHUNK 64 diff --git a/src/lxc/cmd/lxc_user_nic.c b/src/lxc/cmd/lxc_user_nic.c index 4160565f3..fd3455903 100644 --- a/src/lxc/cmd/lxc_user_nic.c +++ b/src/lxc/cmd/lxc_user_nic.c @@ -36,7 +36,7 @@ #include "memory_utils.h" #include "network.h" #include "parse.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "string_utils.h" #include "syscall_wrappers.h" #include "utils.h" @@ -133,14 +133,26 @@ static char *get_username(void) return strdup(pwent.pw_name); } +static void free_groupnames(char **groupnames) +{ + int i; + + if (!groupnames) + return; + + for (i = 0; groupnames[i]; i++) + free(groupnames[i]); + + free(groupnames); +} static char **get_groupnames(void) { __do_free char *buf = NULL; __do_free gid_t *group_ids = NULL; - __do_free_string_list char **groupnames = NULL; int ngroups; int ret, i; + char **groupnames; struct group grent; struct group *grentp = NULL; size_t bufsize; @@ -149,10 +161,9 @@ static char **get_groupnames(void) if (ngroups < 0) { CMD_SYSERROR("Failed to get number of groups the user belongs to\n"); return NULL; - } - - if (ngroups == 0) + } else if (ngroups == 0) { return NULL; + } group_ids = malloc(sizeof(gid_t) * ngroups); if (!group_ids) { @@ -166,53 +177,66 @@ static char **get_groupnames(void) return NULL; } - groupnames = zalloc(sizeof(char *) * (ngroups + 1)); + groupnames = malloc(sizeof(char *) * (ngroups + 1)); if (!groupnames) { CMD_SYSERROR("Failed to allocate memory while getting group names\n"); return NULL; } + memset(groupnames, 0, sizeof(char *) * (ngroups + 1)); + bufsize = sysconf(_SC_GETGR_R_SIZE_MAX); if (bufsize == -1) bufsize = 1024; buf = malloc(bufsize); if (!buf) { + free_groupnames(groupnames); CMD_SYSERROR("Failed to allocate memory while getting group names\n"); return NULL; } for (i = 0; i < ngroups; i++) { while ((ret = getgrgid_r(group_ids[i], &grent, buf, bufsize, &grentp)) == ERANGE) { - char *new_buf; - bufsize <<= 1; if (bufsize > MAX_GRBUF_SIZE) { - usernic_error("Failed to get group members: %u\n", group_ids[i]); + usernic_error("Failed to get group members: %u\n", + group_ids[i]); + free(buf); + free(group_ids); + free_groupnames(groupnames); return NULL; } - - new_buf = realloc(buf, bufsize); + char *new_buf = realloc(buf, bufsize); if (!new_buf) { - usernic_error("Failed to allocate memory while getting group names: %s\n", + usernic_error("Failed to allocate memory while getting group " + "names: %s\n", strerror(errno)); + free(buf); + free(group_ids); + free_groupnames(groupnames); return NULL; } buf = new_buf; } + if (!grentp) { + if (ret == 0) + usernic_error("%s", "Could not find matched group record\n"); - /* If a group is not found, just ignore it. */ - if (!grentp) - continue; + CMD_SYSERROR("Failed to get group name: %u\n", group_ids[i]); + free_groupnames(groupnames); + return NULL; + } groupnames[i] = strdup(grent.gr_name); if (!groupnames[i]) { usernic_error("Failed to copy group name \"%s\"", grent.gr_name); + free_groupnames(groupnames); return NULL; } } - return move_ptr(groupnames); + return groupnames; } static bool name_is_in_groupnames(char *name, char **groupnames) @@ -301,9 +325,9 @@ static int get_alloted(char *me, char *intype, char *link, { __do_free char *line = NULL; __do_fclose FILE *fin = NULL; - __do_free_string_list char **groups = NULL; int n, ret; char name[100], type[100], br[100]; + char **groups; int count = 0; size_t len = 0; @@ -355,6 +379,8 @@ static int get_alloted(char *me, char *intype, char *link, count += n; } + free_groupnames(groups); + /* Now return the total number of nics that this user can create. */ return count; } diff --git a/src/lxc/cmd/lxc_usernsexec.c b/src/lxc/cmd/lxc_usernsexec.c index aee7448ce..6441fb3c8 100644 --- a/src/lxc/cmd/lxc_usernsexec.c +++ b/src/lxc/cmd/lxc_usernsexec.c @@ -61,7 +61,7 @@ static void opentty(const char *tty, int which) fd = open(tty, O_RDWR | O_NONBLOCK); if (fd < 0) { - CMD_SYSINFO("Failed to open tty"); + CMD_SYSERROR("Failed to open tty"); return; } @@ -87,13 +87,13 @@ static int do_child(void *vargv) int ret; char **argv = (char **)vargv; - if (!lxc_setgroups(0, NULL)) - return -1; - /* Assume we want to become root */ if (!lxc_switch_uid_gid(0, 0)) return -1; + if (!lxc_setgroups(0, NULL)) + return -1; + ret = unshare(CLONE_NEWNS); if (ret < 0) { CMD_SYSERROR("Failed to unshare mount namespace"); @@ -103,7 +103,7 @@ static int do_child(void *vargv) if (detect_shared_rootfs()) { ret = mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL); if (ret < 0) { - CMD_SYSINFO("Failed to recursively turn root mount tree into dependent mount"); + CMD_SYSINFO("Failed to make \"/\" rslave"); return -1; } } diff --git a/src/lxc/commands.c b/src/lxc/commands.c index b6ae101fc..37354e87c 100644 --- a/src/lxc/commands.c +++ b/src/lxc/commands.c @@ -75,8 +75,8 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) [LXC_CMD_GET_CONFIG_ITEM] = "get_config_item", [LXC_CMD_GET_NAME] = "get_name", [LXC_CMD_GET_LXCPATH] = "get_lxcpath", - [LXC_CMD_ADD_STATE_CLIENT] = "add_state_client", - [LXC_CMD_CONSOLE_LOG] = "console_log", + [LXC_CMD_ADD_STATE_CLIENT] = "add_state_client", + [LXC_CMD_CONSOLE_LOG] = "console_log", [LXC_CMD_SERVE_STATE_CLIENTS] = "serve_state_clients", [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = "seccomp_notify_add_listener", [LXC_CMD_ADD_BPF_DEVICE_CGROUP] = "add_bpf_device_cgroup", @@ -84,8 +84,10 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) [LXC_CMD_UNFREEZE] = "unfreeze", [LXC_CMD_GET_CGROUP2_FD] = "get_cgroup2_fd", [LXC_CMD_GET_INIT_PIDFD] = "get_init_pidfd", - [LXC_CMD_GET_LIMITING_CGROUP] = "get_limiting_cgroup", - [LXC_CMD_GET_LIMITING_CGROUP2_FD] = "get_limiting_cgroup2_fd", +#ifdef HAVE_ISULAD + [LXC_CMD_SET_TERMINAL_FIFOS] = "set_terminal_fifos", + [LXC_CMD_SET_TERMINAL_WINCH] = "set_terminal_winch", +#endif }; if (cmd >= LXC_CMD_MAX) @@ -108,7 +110,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) * stored directly in data and datalen will be 0. * * As a special case, the response for LXC_CMD_CONSOLE is created - * here as it contains an fd for the ptmx pty passed through the + * here as it contains an fd for the master pty passed through the * unix socket. */ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) @@ -117,7 +119,15 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) int ret; struct lxc_cmd_rsp *rsp = &cmd->rsp; +#ifdef HAVE_ISULAD + /*isulad: add timeout 1s to avoid long block due to [lxc monitor] error*/ + ret = lxc_abstract_unix_recv_fds_timeout(sock, &fd_rsp, 1, rsp, sizeof(*rsp), 1000 * 1000); + if (ret < 0 && (errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK)) { + errno = ECONNRESET; /*isulad set errno ECONNRESET when timeout */ + } +#else ret = lxc_abstract_unix_recv_fds(sock, &fd_rsp, 1, rsp, sizeof(*rsp)); +#endif if (ret < 0) return log_warn_errno(-1, errno, "Failed to receive response for command \"%s\"", @@ -139,14 +149,12 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) ENOMEM, "Failed to receive response for command \"%s\"", lxc_cmd_str(cmd->req.cmd)); - rspdata->ptmxfd = move_fd(fd_rsp); + rspdata->masterfd = move_fd(fd_rsp); rspdata->ttynum = PTR_TO_INT(rsp->data); rsp->data = rspdata; } - if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD || - cmd->req.cmd == LXC_CMD_GET_LIMITING_CGROUP2_FD) - { + if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) { int cgroup2_fd = move_fd(fd_rsp); rsp->data = INT_TO_PTR(cgroup2_fd); } @@ -487,14 +495,25 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, return 0; } -static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath, - const char *subsystem, - lxc_cmd_t command) +/* + * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a + * particular subsystem. This is the cgroup path relative to the root + * of the cgroup filesystem. + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * @subsystem : the subsystem being asked about + * + * Returns the path on success, NULL on failure. The caller must free() the + * returned path. + */ +char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, + const char *subsystem) { int ret, stopped; struct lxc_cmd_rr cmd = { .req = { - .cmd = command, + .cmd = LXC_CMD_GET_CGROUP, .data = subsystem, .datalen = 0, }, @@ -509,21 +528,8 @@ static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath, if (ret < 0) return NULL; - if (ret == 0) { - if (command == LXC_CMD_GET_LIMITING_CGROUP) { - /* - * This may indicate that the container was started - * under an ealier version before - * `cgroup_advanced_isolation` as implemented, there - * it sees an unknown command and just closes the - * socket, sending us an EOF. - */ - return lxc_cmd_get_cgroup_path_do(name, lxcpath, - subsystem, - LXC_CMD_GET_CGROUP); - } + if (ret == 0) return NULL; - } if (cmd.rsp.ret < 0 || cmd.rsp.datalen < 0) return NULL; @@ -531,72 +537,24 @@ static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath, return cmd.rsp.data; } -/* - * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a - * particular subsystem. This is the cgroup path relative to the root - * of the cgroup filesystem. - * - * @name : name of container to connect to - * @lxcpath : the lxcpath in which the container is running - * @subsystem : the subsystem being asked about - * - * Returns the path on success, NULL on failure. The caller must free() the - * returned path. - */ -char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, - const char *subsystem) -{ - return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem, - LXC_CMD_GET_CGROUP); -} - -/* - * lxc_cmd_get_limiting_cgroup_path: Calculate a container's limiting cgroup - * path for a particular subsystem. This is the cgroup path relative to the - * root of the cgroup filesystem. This may be the same as the path returned by - * lxc_cmd_get_cgroup_path if the container doesn't have a limiting path prefix - * set. - * - * @name : name of container to connect to - * @lxcpath : the lxcpath in which the container is running - * @subsystem : the subsystem being asked about - * - * Returns the path on success, NULL on failure. The caller must free() the - * returned path. - */ -char *lxc_cmd_get_limiting_cgroup_path(const char *name, const char *lxcpath, - const char *subsystem) -{ - return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem, - LXC_CMD_GET_LIMITING_CGROUP); -} - -static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr, - bool limiting_cgroup) +static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, + struct lxc_epoll_descr *descr) { int ret; const char *path; - const void *reqdata; struct lxc_cmd_rsp rsp; struct cgroup_ops *cgroup_ops = handler->cgroup_ops; - const char *(*get_fn)(struct cgroup_ops *ops, const char *controller); if (req->datalen > 0) { ret = validate_string_request(fd, req); if (ret != 0) return ret; - reqdata = req->data; + + path = cgroup_ops->get_cgroup(cgroup_ops, req->data); } else { - reqdata = NULL; + path = cgroup_ops->get_cgroup(cgroup_ops, NULL); } - - get_fn = (limiting_cgroup ? cgroup_ops->get_cgroup - : cgroup_ops->get_limiting_cgroup); - - path = get_fn(cgroup_ops, reqdata); - if (!path) return -1; @@ -611,20 +569,6 @@ static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req, return 0; } -static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr) -{ - return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, false); -} - -static int lxc_cmd_get_limiting_cgroup_callback(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr) -{ - return ret_errno(ENOSYS); -} - /* * lxc_cmd_get_config_item: Get config item the running container * @@ -844,7 +788,7 @@ static int lxc_cmd_terminal_winch_callback(int fd, struct lxc_cmd_req *req, * @name : name of container to connect to * @ttynum : in: the tty to open or -1 for next available * : out: the tty allocated - * @fd : out: file descriptor for ptmx side of pty + * @fd : out: file descriptor for master side of pty * @lxcpath : the lxcpath in which the container is running * * Returns fd holding tty allocated on success, < 0 on failure @@ -871,11 +815,11 @@ int lxc_cmd_console(const char *name, int *ttynum, int *fd, const char *lxcpath) if (ret == 0) return log_error(-1, "tty number %d invalid, busy or all ttys busy", *ttynum); - if (rspdata->ptmxfd < 0) + if (rspdata->masterfd < 0) return log_error(-1, "Unable to allocate fd for tty %d", rspdata->ttynum); ret = cmd.rsp.ret; /* socket fd */ - *fd = rspdata->ptmxfd; + *fd = rspdata->masterfd; *ttynum = rspdata->ttynum; return log_info(ret, "Alloced fd %d for tty %d via socket %d", *fd, rspdata->ttynum, ret); @@ -885,17 +829,17 @@ static int lxc_cmd_console_callback(int fd, struct lxc_cmd_req *req, struct lxc_handler *handler, struct lxc_epoll_descr *descr) { - int ptmxfd, ret; + int masterfd, ret; struct lxc_cmd_rsp rsp; int ttynum = PTR_TO_INT(req->data); - ptmxfd = lxc_terminal_allocate(handler->conf, fd, &ttynum); - if (ptmxfd < 0) + masterfd = lxc_terminal_allocate(handler->conf, fd, &ttynum); + if (masterfd < 0) return LXC_CMD_REAP_CLIENT_FD; memset(&rsp, 0, sizeof(rsp)); rsp.data = INT_TO_PTR(ttynum); - ret = lxc_abstract_unix_send_fds(fd, &ptmxfd, 1, &rsp, sizeof(rsp)); + ret = lxc_abstract_unix_send_fds(fd, &masterfd, 1, &rsp, sizeof(rsp)); if (ret < 0) { lxc_terminal_free(handler->conf, fd); return log_error_errno(LXC_CMD_REAP_CLIENT_FD, errno, @@ -1434,47 +1378,146 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath) return PTR_TO_INT(cmd.rsp.data); } -static int lxc_cmd_get_cgroup2_fd_callback_do(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr, - bool limiting_cgroup) +static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, + struct lxc_epoll_descr *descr) { struct lxc_cmd_rsp rsp = { .ret = -EINVAL, }; struct cgroup_ops *ops = handler->cgroup_ops; - int ret, send_fd; + int ret; if (!pure_unified_layout(ops) || !ops->unified) return lxc_cmd_rsp_send(fd, &rsp); - send_fd = limiting_cgroup ? ops->unified->cgfd_limit - : ops->unified->cgfd_con; - rsp.ret = 0; - ret = lxc_abstract_unix_send_fds(fd, &send_fd, 1, &rsp, sizeof(rsp)); + ret = lxc_abstract_unix_send_fds(fd, &ops->unified->cgfd_con, 1, &rsp, + sizeof(rsp)); if (ret < 0) return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send cgroup2 fd"); return 0; } -static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr) +#ifdef HAVE_ISULAD +/* + * isulad: lxc_cmd_set_terminal_fifos: Set the fifos used for the container as terminal input/output + * + * @hashed_sock_name: hashed socket name + * + * Returns 0 when success, else when fail. + */ +int lxc_cmd_set_terminal_fifos(const char *name, const char *lxcpath, const char *in_fifo, + const char *out_fifo, const char *err_fifo) { - return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr, - false); + int ret = 0, stopped = 0; + int len = 0; + char *tmp = NULL; + const char *split = "&&&&", *none_fifo_name = "none"; + const char *cmd_in_fifo = in_fifo ? in_fifo : none_fifo_name; + const char *cmd_out_fifo = out_fifo ? out_fifo : none_fifo_name; + const char *cmd_err_fifo = err_fifo ? err_fifo : none_fifo_name; + + if (len + strlen(cmd_in_fifo) + strlen(split) + strlen(cmd_out_fifo) + + strlen(split) + strlen(cmd_err_fifo) == SIZE_MAX) + return -1; + len += strlen(cmd_in_fifo) + strlen(split) + strlen(cmd_out_fifo) + strlen(split) + strlen(cmd_err_fifo) + 1; + tmp = malloc(len); + if (tmp == NULL) + return -1; + ret = snprintf(tmp, len, "%s%s%s%s%s", cmd_in_fifo, split, cmd_out_fifo, split, cmd_err_fifo); + if (ret < 0 || ret >= len) { + ERROR("Failed to snprintf in fifo of command"); + free(tmp); + return -1; + } + + struct lxc_cmd_rr cmd = { + .req = { + .cmd = LXC_CMD_SET_TERMINAL_FIFOS, + .datalen = strlen(tmp)+1, + .data = tmp, + }, + }; + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); + if (ret < 0) { + ERROR("Failed to send command to container"); + free(tmp); + return -1; + } + + if (cmd.rsp.ret != 0) { + ERROR("Command response error:%d", cmd.rsp.ret); + free(tmp); + return -1; + } + + free(tmp); + return 0; } -static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd, - struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr) +static int lxc_cmd_set_terminal_fifos_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, struct lxc_epoll_descr *descr) { - return ret_errno(ENOSYS); + struct lxc_cmd_rsp rsp; + memset(&rsp, 0, sizeof(rsp)); + + rsp.ret = lxc_terminal_add_fifos(handler->conf, req->data);; + + return lxc_cmd_rsp_send(fd, &rsp); +} + +struct lxc_cmd_set_terminal_winch_request { + unsigned int height; + unsigned int width; +}; + +int lxc_cmd_set_terminal_winch(const char *name, const char *lxcpath, unsigned int height, unsigned int width) +{ + int ret = 0, stopped = 0; + struct lxc_cmd_set_terminal_winch_request data = { 0 }; + + data.height = height; + data.width = width; + + struct lxc_cmd_rr cmd = { + .req = { + .cmd = LXC_CMD_SET_TERMINAL_WINCH, + .datalen = sizeof(struct lxc_cmd_set_terminal_winch_request), + .data = &data, + }, + }; + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); + if (ret < 0) { + ERROR("Failed to send command to container"); + return -1; + } + + if (cmd.rsp.ret != 0) { + ERROR("Command response error:%d", cmd.rsp.ret); + return -1; + } + return 0; } +static int lxc_cmd_set_terminal_winch_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, struct lxc_epoll_descr *descr) +{ + struct lxc_cmd_rsp rsp; + struct lxc_cmd_set_terminal_winch_request *data = (struct lxc_cmd_set_terminal_winch_request *)(req->data); + memset(&rsp, 0, sizeof(rsp)); + + rsp.ret = lxc_set_terminal_winsz(&handler->conf->console, data->height, data->width);; + + return lxc_cmd_rsp_send(fd, &rsp); + +} + +#endif + static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, struct lxc_handler *handler, struct lxc_epoll_descr *descr) @@ -1502,12 +1545,14 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, [LXC_CMD_UNFREEZE] = lxc_cmd_unfreeze_callback, [LXC_CMD_GET_CGROUP2_FD] = lxc_cmd_get_cgroup2_fd_callback, [LXC_CMD_GET_INIT_PIDFD] = lxc_cmd_get_init_pidfd_callback, - [LXC_CMD_GET_LIMITING_CGROUP] = lxc_cmd_get_limiting_cgroup_callback, - [LXC_CMD_GET_LIMITING_CGROUP2_FD] = lxc_cmd_get_limiting_cgroup2_fd_callback, +#ifdef HAVE_ISULAD + [LXC_CMD_SET_TERMINAL_FIFOS] = lxc_cmd_set_terminal_fifos_callback, + [LXC_CMD_SET_TERMINAL_WINCH] = lxc_cmd_set_terminal_winch_callback, +#endif }; if (req->cmd >= LXC_CMD_MAX) - return log_trace_errno(-1, EINVAL, "Invalid command id %d", req->cmd); + return log_error_errno(-1, ENOENT, "Undefined command id %d", req->cmd); return cb[req->cmd](fd, req, handler, descr); } diff --git a/src/lxc/commands.h b/src/lxc/commands.h index 3624a1497..aa8289d7a 100644 --- a/src/lxc/commands.h +++ b/src/lxc/commands.h @@ -38,8 +38,10 @@ typedef enum { LXC_CMD_UNFREEZE, LXC_CMD_GET_CGROUP2_FD, LXC_CMD_GET_INIT_PIDFD, - LXC_CMD_GET_LIMITING_CGROUP, - LXC_CMD_GET_LIMITING_CGROUP2_FD, +#ifdef HAVE_ISULAD + LXC_CMD_SET_TERMINAL_FIFOS, + LXC_CMD_SET_TERMINAL_WINCH, +#endif LXC_CMD_MAX, } lxc_cmd_t; @@ -61,7 +63,7 @@ struct lxc_cmd_rr { }; struct lxc_cmd_console_rsp_data { - int ptmxfd; + int masterfd; int ttynum; }; @@ -131,9 +133,11 @@ extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath, extern int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout); extern int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout); extern int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath); -extern char *lxc_cmd_get_limiting_cgroup_path(const char *name, - const char *lxcpath, - const char *subsystem); -extern int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath); + +#ifdef HAVE_ISULAD +extern int lxc_cmd_set_terminal_fifos(const char *name, const char *lxcpath, + const char *in_fifo, const char *out_fifo, const char *err_fifo); +extern int lxc_cmd_set_terminal_winch(const char *name, const char *lxcpath, unsigned int height, unsigned int width); +#endif #endif /* __commands_h */ diff --git a/src/lxc/commands_utils.c b/src/lxc/commands_utils.c index 2af722ca1..2f2670d74 100644 --- a/src/lxc/commands_utils.c +++ b/src/lxc/commands_utils.c @@ -62,14 +62,11 @@ int lxc_cmd_sock_get_state(const char *name, const char *lxcpath, ret = lxc_cmd_add_state_client(name, lxcpath, states, &state_client_fd); if (ret < 0) - return ret_errno(EINVAL); + return -1; if (ret < MAX_STATE) return ret; - if (state_client_fd < 0) - return ret_errno(EBADF); - return lxc_cmd_sock_rcv_state(state_client_fd, timeout); } diff --git a/src/lxc/compiler.h b/src/lxc/compiler.h index 114fb81ba..92cd9fd14 100644 --- a/src/lxc/compiler.h +++ b/src/lxc/compiler.h @@ -57,22 +57,4 @@ #define __cgfsng_ops -/* access attribute */ -#define __access_r(x, y) -#define __access_w(x, y) -#define __access_rw(x, y) - -#ifdef __has_attribute -#if __has_attribute(access) -#undef __access_r -#define __access_r(x, y) __attribute__((access(read_only, x, y))) - -#undef __access_w -#define __access_w(x, y) __attribute__((access(write_only, x, y))) - -#undef __access_rw -#define __access_rw(x, y) __attribute__((access(read_write, x, y))) -#endif -#endif - #endif /* __LXC_COMPILER_H */ diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 00789961c..0744c19b3 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -33,6 +33,11 @@ #include #include +#ifdef HAVE_ISULAD +#include +#include "sync.h" +#endif + #include "af_unix.h" #include "caps.h" #include "cgroup.h" @@ -51,15 +56,18 @@ #include "namespace.h" #include "network.h" #include "parse.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "ringbuf.h" #include "start.h" #include "storage.h" #include "storage/overlay.h" #include "syscall_wrappers.h" #include "terminal.h" +#include "loop.h" #include "utils.h" #include "uuid.h" +#include "path.h" +#include "utils.h" #ifdef MAJOR_IN_MKDEV #include @@ -118,7 +126,14 @@ char *lxchook_names[NUM_LXC_HOOKS] = { "post-stop", "clone", "destroy", +#ifdef HAVE_ISULAD + "start-host", + "oci-prestart", + "oci-poststart", + "oci-poststop" +#else "start-host" +#endif }; struct mount_opt { @@ -637,8 +652,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL }, { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL }, { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, + #ifdef HAVE_ISULAD + { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, + { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, + #else { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL }, { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL }, + #endif { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL }, { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL }, @@ -670,6 +690,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha if (!destination) return -1; +#ifdef HAVE_ISULAD + if (mkdir_p(destination, 0755) < 0) { + SYSERROR("Failed to create mount target '%s'", destination); + return log_error(-1, "Failed to mkdir destination %s", destination); + } +#endif + mflags = add_required_remount_flags(source, destination, default_mounts[i].flags); r = safe_mount(source, destination, default_mounts[i].fstype, @@ -901,13 +928,11 @@ static int lxc_setup_ttys(struct lxc_conf *conf) return 0; } -define_cleanup_function(struct lxc_tty_info *, lxc_delete_tty); - int lxc_allocate_ttys(struct lxc_conf *conf) { - struct lxc_terminal_info *tty_new = NULL; + __do_free struct lxc_terminal_info *tty_new = NULL; int ret; - call_cleaner(lxc_delete_tty) struct lxc_tty_info *ttys = &conf->ttys; + struct lxc_tty_info *ttys = &conf->ttys; /* no tty in the configuration */ if (ttys->max == 0) @@ -921,39 +946,41 @@ int lxc_allocate_ttys(struct lxc_conf *conf) for (size_t i = 0; i < ttys->max; i++) { struct lxc_terminal_info *tty = &ttys->tty[i]; - tty->ptmx = -EBADF; - tty->pts = -EBADF; - ret = openpty(&tty->ptmx, &tty->pts, NULL, NULL, NULL); + tty->master = -EBADF; + tty->slave = -EBADF; + ret = openpty(&tty->master, &tty->slave, NULL, NULL, NULL); if (ret < 0) { ttys->max = i; + lxc_delete_tty(ttys); return log_error_errno(-ENOTTY, ENOTTY, "Failed to create tty %zu", i); } - ret = ttyname_r(tty->pts, tty->name, sizeof(tty->name)); + ret = ttyname_r(tty->slave, tty->name, sizeof(tty->name)); if (ret < 0) { ttys->max = i; - return log_error_errno(-ENOTTY, ENOTTY, "Failed to retrieve name of tty %zu pts", i); + lxc_delete_tty(ttys); + return log_error_errno(-ENOTTY, ENOTTY, "Failed to retrieve name of tty %zu slave", i); } - DEBUG("Created tty \"%s\" with ptmx fd %d and pts fd %d", - tty->name, tty->ptmx, tty->pts); + DEBUG("Created tty \"%s\" with master fd %d and slave fd %d", + tty->name, tty->master, tty->slave); /* Prevent leaking the file descriptors to the container */ - ret = fd_cloexec(tty->ptmx, true); + ret = fd_cloexec(tty->master, true); if (ret < 0) - SYSWARN("Failed to set FD_CLOEXEC flag on ptmx fd %d of tty device \"%s\"", - tty->ptmx, tty->name); + SYSWARN("Failed to set FD_CLOEXEC flag on master fd %d of tty device \"%s\"", + tty->master, tty->name); - ret = fd_cloexec(tty->pts, true); + ret = fd_cloexec(tty->slave, true); if (ret < 0) - SYSWARN("Failed to set FD_CLOEXEC flag on pts fd %d of tty device \"%s\"", - tty->pts, tty->name); + SYSWARN("Failed to set FD_CLOEXEC flag on slave fd %d of tty device \"%s\"", + tty->slave, tty->name); tty->busy = -1; } INFO("Finished creating %zu tty devices", ttys->max); - move_ptr(ttys); + ttys->tty = move_ptr(tty_new); return 0; } @@ -964,8 +991,8 @@ void lxc_delete_tty(struct lxc_tty_info *ttys) for (int i = 0; i < ttys->max; i++) { struct lxc_terminal_info *tty = &ttys->tty[i]; - close_prot_errno_disarm(tty->ptmx); - close_prot_errno_disarm(tty->pts); + close_prot_errno_disarm(tty->master); + close_prot_errno_disarm(tty->slave); } free_disarm(ttys->tty); @@ -986,15 +1013,15 @@ static int lxc_send_ttys_to_parent(struct lxc_handler *handler) int ttyfds[2]; struct lxc_terminal_info *tty = &ttys->tty[i]; - ttyfds[0] = tty->ptmx; - ttyfds[1] = tty->pts; + ttyfds[0] = tty->master; + ttyfds[1] = tty->slave; ret = lxc_abstract_unix_send_fds(sock, ttyfds, 2, NULL, 0); if (ret < 0) break; - TRACE("Sent tty \"%s\" with ptmx fd %d and pts fd %d to parent", - tty->name, tty->ptmx, tty->pts); + TRACE("Sent tty \"%s\" with master fd %d and slave fd %d to parent", + tty->name, tty->master, tty->slave); } if (ret < 0) @@ -1047,8 +1074,13 @@ on_error: /* Just create a path for /dev under $lxcpath/$name and in rootfs If we hit an * error, log it but don't fail yet. */ +#ifdef HAVE_ISULAD +static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, + int autodevtmpfssize, const char *lxcpath, char *systemd) +#else static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, int autodevtmpfssize, const char *lxcpath) +#endif { __do_free char *path = NULL; int ret; @@ -1061,6 +1093,7 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, /* $(rootfs->mount) + "/dev/pts" + '\0' */ clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9; path = must_realloc(NULL, clen); + sprintf(mount_options, "size=%d,mode=755", (autodevtmpfssize != 0) ? autodevtmpfssize : 500000); DEBUG("Using mount options: %s", mount_options); @@ -1076,6 +1109,23 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, goto reset_umask; } +#ifdef HAVE_ISULAD + if (systemd != NULL && !strcmp(systemd, "true")) { + ret = mount(path, path, "", MS_BIND, NULL); + if (ret < 0) { + SYSERROR("Failed to bind mount path \"%s\"", path); + goto reset_umask; + } + } else { + ret = safe_mount("none", path, "tmpfs", 0, mount_options, + rootfs->path ? rootfs->mount : NULL); + if (ret < 0) { + SYSERROR("Failed to mount tmpfs on \"%s\"", path); + goto reset_umask; + } + TRACE("Mounted tmpfs on \"%s\"", path); + } +#else ret = safe_mount("none", path, "tmpfs", 0, mount_options, rootfs->path ? rootfs->mount : NULL ); if (ret < 0) { @@ -1083,6 +1133,7 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, goto reset_umask; } TRACE("Mounted tmpfs on \"%s\"", path); +#endif ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : ""); if (ret < 0 || (size_t)ret >= clen) { @@ -1223,16 +1274,118 @@ static int lxc_fill_autodev(const struct lxc_rootfs *rootfs) return 0; } +static void null_endofword(char *word) +{ + while (*word && *word != ' ' && *word != '\t') + word++; + *word = '\0'; +} + +/* skip @nfields spaces in @src */ +static char *get_field(char *src, int nfields) +{ + int i; + char *p = src; + + for (i = 0; i < nfields; i++) { + while (*p && *p != ' ' && *p != '\t') + p++; + + if (!*p) + break; + + p++; + } + + return p; +} + +#ifdef HAVE_ISULAD +static int rootfs_parent_mount_private(char *rootfs) +{ + /* walk /proc/self/mountinfo and change parent of rootfs to private */ + FILE *f = fopen("/proc/self/mountinfo", "r"); + char *line = NULL; + char *parent = NULL, *options = NULL; + size_t len = 0; + int ret = 0; + + if (!f) { + SYSERROR("Failed to open /proc/self/mountinfo to make parent of rootfs to private"); + return -1; + } + + while (getline(&line, &len, f) != -1) { + char *target = NULL; + char *opts = NULL; + char *tmptarget = NULL; + target = get_field(line, 4); + if (!target) + continue; + tmptarget = safe_strdup(target); + null_endofword(tmptarget); + if (!strstr(rootfs, tmptarget)) { + free(tmptarget); + continue; + } + if (!parent || strlen(tmptarget) > strlen(parent)) { + free(parent); + parent = tmptarget; + } else { + free(tmptarget); + continue; + } + opts = get_field(target, 2); + if (!opts) + continue; + null_endofword(opts); + free(options); + options = safe_strdup(opts); + } + + if (!parent || !options) { + ERROR("Could not find parent mount of %s", rootfs); + ret = -1; + } else { + if (strstr(options, "shared")) { + if (mount(NULL, parent, NULL, MS_PRIVATE, NULL)) { + SYSERROR("Failed to make %s private", parent); + ret = -1; + } + DEBUG("Mounted parent %s of rootfs %s to private", parent, rootfs); + } + } + free(parent); + free(options); + fclose(f); + free(line); + return ret; +} +#endif + static int lxc_mount_rootfs(struct lxc_conf *conf) { int ret; struct lxc_storage *bdev; - const struct lxc_rootfs *rootfs = &conf->rootfs; + struct lxc_rootfs *rootfs = &conf->rootfs; + +#ifdef HAVE_ISULAD + unsigned long flags, mntflags, pflags; + char *mntdata = NULL; +#endif if (!rootfs->path) { ret = mount("", "/", NULL, MS_SLAVE | MS_REC, 0); if (ret < 0) - return log_error_errno(-1, errno, "Failed to recursively turn root mount tree into dependent mount"); + return log_error_errno(-1, errno, "Failed to remount \"/\" MS_REC | MS_SLAVE"); +#ifdef HAVE_ISULAD + if (!access(rootfs->mount, F_OK)) { + rootfs->path = safe_strdup("/"); + if (mount("/", rootfs->mount, NULL, MS_BIND, 0)) { + return log_error_errno(-1, errno, "Failed to mount \"/\" to %s", rootfs->mount); + } + } +#endif return 0; } @@ -1242,6 +1395,44 @@ static int lxc_mount_rootfs(struct lxc_conf *conf) return log_error_errno(-1, errno, "Failed to access to \"%s\". Check it is present", rootfs->mount); +#ifdef HAVE_ISULAD + // Support mount propagations of rootfs + // Get rootfs mnt propagation options, such as slave or shared + if (parse_mntopts(conf->rootfs.options, &mntflags, &pflags, &mntdata) < 0) { + free(mntdata); + return -1; + } + free(mntdata); + + flags = MS_SLAVE | MS_REC; + if (pflags) + flags = pflags; + + /* Mount propagation inside container can not greater than host. + * So we must change propagation of root according to flags, default is rslave. + * That means shared propagation inside container is disabled by default. + */ + ret = mount("", "/", NULL, flags, NULL); + if (ret < 0) { + return log_error_errno(-1, errno, "Failed to make / to propagation flags %lu.", flags); + } + + /* Make parent mount private to make sure following bind mount does + * not propagate in other namespaces. Also it will help with kernel + * check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent)) + */ + ret = rootfs_parent_mount_private(conf->rootfs.mount); + if (ret != 0) { + return log_error(-1, "Failed to make parent of rootfs %s to private.", conf->rootfs.mount); + } + + ret = mount(conf->rootfs.mount, conf->rootfs.mount, "bind", MS_BIND | MS_REC, NULL); + if (ret < 0) { + SYSERROR("Failed to mount rootfs %s", conf->rootfs.mount); + return -1; + } +#endif + bdev = storage_init(conf); if (!bdev) return log_error(-1, "Failed to mount rootfs \"%s\" onto \"%s\" with options \"%s\"", @@ -1262,6 +1453,158 @@ static int lxc_mount_rootfs(struct lxc_conf *conf) return 0; } +#ifdef HAVE_ISULAD +// maskPath masks the top of the specified path inside a container to avoid +// security issues from processes reading information from non-namespace aware +// mounts ( proc/kcore ). +static bool mask_path(const char *path) +{ + int ret; + + if (!path) + return true; + + ret = mount("/dev/null", path, "", MS_BIND, ""); + if (ret < 0 && errno != ENOENT) { + if (errno == ENOTDIR) { + ret = mount("tmpfs", path, "tmpfs", MS_RDONLY, ""); + if (ret < 0) + goto error; + return true; + } + goto error; + } + return true; + +error: + SYSERROR("Failed to mask path \"%s\": %s", path, strerror(errno)); + return false; +} + +#ifdef HAVE_ISULAD +static bool remount_readwrite(const char *path) +{ + int ret, i; + + if (!path) + return true; + + for (i = 0; i < 5; i++) { + ret = mount("", path, "", MS_REMOUNT, ""); + if (ret < 0 && errno != ENOENT) { + if (errno == EINVAL) { + // Probably not a mountpoint, use bind-mount + ret = mount(path, path, "", MS_BIND, ""); + if (ret < 0) + goto on_error; + ret = mount(path, path, "", MS_BIND | MS_REMOUNT | MS_REC | \ + MS_NOEXEC | MS_NOSUID | MS_NODEV, ""); + if (ret < 0) + goto on_error; + } else if (errno == EBUSY) { + DEBUG("Try to mount \"%s\" to readonly after 100ms.", path); + usleep(100 * 1000); + continue; + } else { + goto on_error; + } + } + return true; + } + +on_error: + SYSERROR("Unable to mount \"%s\" to readwrite", path); + return false; +} + +static int remount_proc_sys_mount_entries(struct lxc_list *mount_list, bool lsm_aa_allow_nesting) +{ + char buf[4096]; + FILE *file; + struct mntent mntent; + + file = make_anonymous_mount_file(mount_list, lsm_aa_allow_nesting); + if (!file) + return -1; + + while (getmntent_r(file, &mntent, buf, sizeof(buf))) { + if (strstr(mntent.mnt_dir, "proc/sys") == NULL) { + continue; + } + + if (!remount_readwrite((const char*)mntent.mnt_dir)) { + fclose(file); + return -1; + } + } + + fclose(file); + return 0; +} +#endif + +// remount_readonly will bind over the top of an existing path and ensure that it is read-only. +static bool remount_readonly(const char *path) +{ + int ret, i; + + if (!path) + return true; + + for (i = 0; i < 5; i++) { + ret = mount("", path, "", MS_REMOUNT | MS_RDONLY, ""); + if (ret < 0 && errno != ENOENT) { + if (errno == EINVAL) { + // Probably not a mountpoint, use bind-mount + ret = mount(path, path, "", MS_BIND, ""); + if (ret < 0) + goto on_error; + ret = mount(path, path, "", MS_BIND | MS_REMOUNT | MS_RDONLY | MS_REC | \ + MS_NOEXEC | MS_NOSUID | MS_NODEV, ""); + if (ret < 0) + goto on_error; + } else if (errno == EBUSY) { + DEBUG("Try to mount \"%s\" to readonly after 100ms.", path); + usleep(100 * 1000); + continue; + } else { + goto on_error; + } + } + return true; + } + +on_error: + SYSERROR("Unable to mount \"%s\" to readonly", path); + return false; +} + +// isulad: setup rootfs masked paths +static int setup_rootfs_maskedpaths(struct lxc_list *maskedpaths) +{ + struct lxc_list *it; + + lxc_list_for_each(it, maskedpaths) { + if (!mask_path((char *)it->elem)) + return -1; + } + + return 0; +} +// isulad: setup rootfs ro paths +static int setup_rootfs_ropaths(struct lxc_list *ropaths) +{ + struct lxc_list *it; + + lxc_list_for_each(it, ropaths) { + if (!remount_readonly((char *)it->elem)) + return -1; + } + + return 0; +} +#endif + int lxc_chroot(const struct lxc_rootfs *rootfs) { __do_free char *nroot = NULL; @@ -1409,12 +1752,12 @@ static int lxc_pivot_root(const char *rootfs) if (ret < 0) return log_error_errno(-1, errno, "Failed to enter old root directory"); - /* Make oldroot a depedent mount to make sure our umounts don't propagate to the + /* Make oldroot rslave to make sure our umounts don't propagate to the * host. */ ret = mount("", ".", "", MS_SLAVE | MS_REC, NULL); if (ret < 0) - return log_error_errno(-1, errno, "Failed to recursively turn old root mount tree into dependent mount"); + return log_error_errno(-1, errno, "Failed to make oldroot rslave"); ret = umount2(".", MNT_DETACH); if (ret < 0) @@ -1575,28 +1918,21 @@ static int setup_personality(int persona) return 0; } -static inline bool wants_console(const struct lxc_terminal *terminal) -{ - return !terminal->path || strcmp(terminal->path, "none"); -} - static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs, - const struct lxc_terminal *console, - int pts_mnt_fd) + const struct lxc_terminal *console) { int ret; char path[PATH_MAX]; char *rootfs_path = rootfs->path ? rootfs->mount : ""; - if (!wants_console(console)) + if (console->path && !strcmp(console->path, "none")) return 0; ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path); if (ret < 0 || (size_t)ret >= sizeof(path)) return -1; - /* - * When we are asked to setup a console we remove any previous + /* When we are asked to setup a console we remove any previous * /dev/console bind-mounts. */ if (file_exists(path)) { @@ -1607,49 +1943,39 @@ static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs, DEBUG("Cleared all (%d) mounts from \"%s\"", ret, path); } - /* - * For unprivileged containers autodev or automounts will already have + /* For unprivileged containers autodev or automounts will already have * taken care of creating /dev/console. */ ret = mknod(path, S_IFREG | 0000, 0); if (ret < 0 && errno != EEXIST) return log_error_errno(-errno, errno, "Failed to create console"); - ret = fchmod(console->pts, S_IXUSR | S_IXGRP); +#ifdef HAVE_ISULAD + if (console->slave > 0) { +#endif + ret = fchmod(console->slave, S_IXUSR | S_IXGRP); if (ret < 0) return log_error_errno(-errno, errno, "Failed to set mode \"0%o\" to \"%s\"", S_IXUSR | S_IXGRP, console->name); - if (pts_mnt_fd >= 0) { - ret = move_mount(pts_mnt_fd, "", -EBADF, path, MOVE_MOUNT_F_EMPTY_PATH); - if (!ret) { - DEBUG("Moved mount \"%s\" onto \"%s\"", console->name, path); - goto finish; - } - - if (ret && errno != ENOSYS) - return log_error_errno(-1, errno, - "Failed to mount %d(%s) on \"%s\"", - pts_mnt_fd, console->name, path); - } - ret = safe_mount(console->name, path, "none", MS_BIND, 0, rootfs_path); if (ret < 0) - return log_error_errno(-1, errno, "Failed to mount %d(%s) on \"%s\"", pts_mnt_fd, console->name, path); - -finish: - DEBUG("Mounted pts device %d(%s) onto \"%s\"", pts_mnt_fd, console->name, path); + return log_error_errno(-1, errno, "Failed to mount \"%s\" on \"%s\"", console->name, path); +#ifdef HAVE_ISULAD + } +#endif + DEBUG("Mounted pts device \"%s\" onto \"%s\"", console->name, path); return 0; } static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, const struct lxc_terminal *console, - char *ttydir, int pts_mnt_fd) + char *ttydir) { int ret; char path[PATH_MAX], lxcpath[PATH_MAX]; char *rootfs_path = rootfs->path ? rootfs->mount : ""; - if (!wants_console(console)) + if (console->path && !strcmp(console->path, "none")) return 0; /* create rootfs/dev/ directory */ @@ -1686,30 +2012,22 @@ static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, if (ret < 0 && errno != EEXIST) return log_error_errno(-errno, errno, "Failed to create console"); - ret = fchmod(console->pts, S_IXUSR | S_IXGRP); +#ifdef HAVE_ISULAD + if (console->slave > 0) { +#endif + ret = fchmod(console->slave, S_IXUSR | S_IXGRP); if (ret < 0) return log_error_errno(-errno, errno, "Failed to set mode \"0%o\" to \"%s\"", S_IXUSR | S_IXGRP, console->name); /* bind mount console->name to '/dev//console' */ - if (pts_mnt_fd >= 0) { - ret = move_mount(pts_mnt_fd, "", -EBADF, lxcpath, MOVE_MOUNT_F_EMPTY_PATH); - if (!ret) { - DEBUG("Moved mount \"%s\" onto \"%s\"", console->name, lxcpath); - goto finish; - } - - if (ret && errno != ENOSYS) - return log_error_errno(-1, errno, - "Failed to mount %d(%s) on \"%s\"", - pts_mnt_fd, console->name, lxcpath); - } - ret = safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs_path); if (ret < 0) - return log_error_errno(-1, errno, "Failed to mount %d(%s) on \"%s\"", pts_mnt_fd, console->name, lxcpath); + return log_error_errno(-1, errno, "Failed to mount \"%s\" on \"%s\"", console->name, lxcpath); DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath); +#ifdef HAVE_ISULAD + } +#endif -finish: /* bind mount '/dev//console' to '/dev/console' */ ret = safe_mount(lxcpath, path, "none", MS_BIND, 0, rootfs_path); if (ret < 0) @@ -1721,16 +2039,51 @@ finish: } static int lxc_setup_console(const struct lxc_rootfs *rootfs, - const struct lxc_terminal *console, char *ttydir, - int pts_mnt_fd) + const struct lxc_terminal *console, char *ttydir) { if (!ttydir) - return lxc_setup_dev_console(rootfs, console, pts_mnt_fd); + return lxc_setup_dev_console(rootfs, console); - return lxc_setup_ttydir_console(rootfs, console, ttydir, pts_mnt_fd); + return lxc_setup_ttydir_console(rootfs, console, ttydir); } +#ifdef HAVE_ISULAD +static void parse_mntopt(char *opt, unsigned long *mflags, unsigned long *pflags, char **data, size_t size) +{ + struct mount_opt *mo; + + /* If opt is found in mount_opt, set or clear flags. + * Otherwise append it to data. */ + + for (mo = &mount_opt[0]; mo->name != NULL; mo++) { + if (strncmp(opt, mo->name, strlen(mo->name)) == 0) { + if (mo->clear) + *mflags &= ~mo->flag; + else + *mflags |= mo->flag; + return; + } + } + + /* If opt is found in propagation_opt, set or clear flags. */ + for (mo = &propagation_opt[0]; mo->name != NULL; mo++) { + if (strncmp(opt, mo->name, strlen(mo->name)) != 0) + continue; + + if (mo->clear) + *pflags &= ~mo->flag; + else + *pflags |= mo->flag; + + return; + } + + if (strlen(*data)) + (void)strlcat(*data, ",", size); + (void)strlcat(*data, opt, size); +} +#else static int parse_mntopt(char *opt, unsigned long *flags, char **data, size_t size) { ssize_t ret; @@ -1767,7 +2120,43 @@ static int parse_mntopt(char *opt, unsigned long *flags, char **data, size_t siz return 0; } +#endif + +#ifdef HAVE_ISULAD +int parse_mntopts(const char *mntopts, unsigned long *mntflags, unsigned long *pflags, char **mntdata) +{ + char *data, *p, *s; + size_t size; + + *mntdata = NULL; + *mntflags = 0L; + *pflags = 0L; + if (!mntopts) + return 0; + + s = safe_strdup(mntopts); + + size = strlen(s) + 1; + data = malloc(size); + if (!data) { + free(s); + return -1; + } + *data = 0; + + lxc_iterate_parts(p, s, ",") + parse_mntopt(p, mntflags, pflags, &data, size); + + if (*data) + *mntdata = data; + else + free(data); + free(s); + + return 0; +} +#else int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata) { __do_free char *mntopts_new = NULL, *mntopts_dup = NULL; @@ -1798,6 +2187,7 @@ int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata) return 0; } +#endif static void parse_propagationopt(char *opt, unsigned long *flags) { @@ -1836,43 +2226,17 @@ int parse_propagationopts(const char *mntopts, unsigned long *pflags) return 0; } -static void null_endofword(char *word) +static int mount_entry(const char *fsname, const char *target, + const char *fstype, unsigned long mountflags, + unsigned long pflags, const char *data, bool optional, + bool dev, bool relative, const char *rootfs) { - while (*word && *word != ' ' && *word != '\t') - word++; - *word = '\0'; -} - -/* skip @nfields spaces in @src */ -static char *get_field(char *src, int nfields) -{ - int i; - char *p = src; - - for (i = 0; i < nfields; i++) { - while (*p && *p != ' ' && *p != '\t') - p++; - - if (!*p) - break; - - p++; - } - - return p; -} - -static int mount_entry(const char *fsname, const char *target, - const char *fstype, unsigned long mountflags, - unsigned long pflags, const char *data, bool optional, - bool dev, bool relative, const char *rootfs) -{ - int ret; - char srcbuf[PATH_MAX]; - const char *srcpath = fsname; -#ifdef HAVE_STATVFS - struct statvfs sb; -#endif + int ret; + char srcbuf[PATH_MAX]; + const char *srcpath = fsname; +#ifdef HAVE_STATVFS + struct statvfs sb; +#endif if (relative) { ret = snprintf(srcbuf, sizeof(srcbuf), "%s/%s", rootfs ? rootfs : "/", fsname ? fsname : ""); @@ -2010,8 +2374,15 @@ static int mount_entry_create_dir_file(const struct mntent *mntent, if (hasmntopt(mntent, "create=dir")) { ret = mkdir_p(path, 0755); +#ifdef HAVE_ISULAD + if (ret < 0 && errno != EEXIST) { + lxc_write_error_message(rootfs->errfd, "%s:%d: mkdir %s: %s.", __FILE__, __LINE__, path, strerror(errno)); + return log_error_errno(-1, errno, "Failed to create directory \"%s\"", path); + } +#else if (ret < 0 && errno != EEXIST) return log_error_errno(-1, errno, "Failed to create directory \"%s\"", path); +#endif } if (!hasmntopt(mntent, "create=file")) @@ -2028,16 +2399,184 @@ static int mount_entry_create_dir_file(const struct mntent *mntent, p2 = dirname(p1); ret = mkdir_p(p2, 0755); +#ifdef HAVE_ISULAD + if (ret < 0 && errno != EEXIST) { + lxc_write_error_message(rootfs->errfd, "%s:%d: mkdir %s: %s.", __FILE__, __LINE__, path, strerror(errno)); + return log_error_errno(-1, errno, "Failed to create directory \"%s\"", path); + } +#else if (ret < 0 && errno != EEXIST) return log_error_errno(-1, errno, "Failed to create directory \"%s\"", path); +#endif ret = mknod(path, S_IFREG | 0000, 0); +#ifdef HAVE_ISULAD + if (ret < 0 && errno != EEXIST) { + lxc_write_error_message(rootfs->errfd, "%s:%d: open %s: %s.", __FILE__, __LINE__, path, strerror(errno)); + return -errno; + } +#else if (ret < 0 && errno != EEXIST) return -errno; +#endif + + return 0; +} + +#ifdef HAVE_ISULAD +static int mount_entry_with_loop_dev(const char *src, const char *dest, const char *fstype, + char *mnt_opts, const char *rootfs) +{ + int srcfd = -1, destfd, ret, saved_errno; + char srcbuf[50], destbuf[50]; // only needs enough for /proc/self/fd/ + const char *mntsrc = src; + int max_retry = 5; + struct lxc_storage loop; + + if (!rootfs) + rootfs = ""; + + /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */ + if (src && src[0] != '/') { + INFO("this is a relative mount"); + srcfd = open_without_symlink(src, NULL); + if (srcfd < 0) + return srcfd; + ret = snprintf(srcbuf, sizeof(srcbuf), "/proc/self/fd/%d", srcfd); + if (ret < 0 || ret > sizeof(srcbuf)) { + close(srcfd); + ERROR("Failed to print string"); + return -EINVAL; + } + mntsrc = srcbuf; + } + + destfd = open_without_symlink(dest, rootfs); + if (destfd < 0) { + if (srcfd != -1) { + saved_errno = errno; + close(srcfd); + errno = saved_errno; + } + return destfd; + } + + ret = snprintf(destbuf, sizeof(destbuf), "/proc/self/fd/%d", destfd); + if (ret < 0 || ret > sizeof(destbuf)) { + if (srcfd != -1) + close(srcfd); + close(destfd); + ERROR("Out of memory"); + return -EINVAL; + } + +retry: + loop.src = (char *)mntsrc; + loop.dest = destbuf; + loop.mntopts = mnt_opts; + loop.type = "loop"; + loop.lofd = -1; + ret = loop_mount(&loop); + if (ret < 0) { + /* If loop is used by other program, mount may fail. So + * we do retry to ensure mount ok */ + if (max_retry > 0) { + max_retry--; + DEBUG("mount entry with loop dev failed, retry mount." + "retry count left %d", max_retry); + goto retry; + } + } + if (loop.lofd != -1) + close(loop.lofd); + if (srcfd != -1) + close(srcfd); + close(destfd); + if (ret < 0) { + SYSERROR("Failed to mount %s onto %s", src, dest); + return ret; + } return 0; } +/* isulad: checkMountDestination checks to ensure that the mount destination is not over the top of /proc. + * dest is required to be an abs path and have any symlinks resolved before calling this function. */ +static int check_mount_destination(const char *rootfs, const char *dest) +{ + const char *invalid_destinations[] = { + "/proc", + NULL + }; + // White list, it should be sub directories of invalid destinations + const char *valid_destinations[] = { + // These entries can be bind mounted by files emulated by fuse, + // so commands like top, free displays stats in container. + "/proc/cpuinfo", + "/proc/diskstats", + "/proc/meminfo", + "/proc/stat", + "/proc/swaps", + "/proc/uptime", + "/proc/net/dev", + NULL + }; + const char **valid = NULL; + const char **invalid = NULL; + + for(valid = valid_destinations; *valid != NULL; valid++) { + char *fullpath = NULL; + char *relpath = NULL; + const char *parts[3] = { + rootfs, + *valid, + NULL + }; + fullpath = lxc_string_join("/", parts, false); + if (!fullpath) { + ERROR("Out of memory"); + return -1; + } + relpath = path_relative(fullpath, dest); + free(fullpath); + if (!relpath) + return -1; + if (!strcmp(relpath, ".")) { + free(relpath); + return 0; + } + free(relpath); + } + + for(invalid = invalid_destinations; *invalid != NULL; invalid++) { + char *fullpath = NULL; + char *relpath = NULL; + const char *parts[3] = { + rootfs, + *invalid, + NULL + }; + fullpath = lxc_string_join("/", parts, false); + if (!fullpath) { + ERROR("Out of memory"); + return -1; + } + relpath = path_relative(fullpath, dest); + free(fullpath); + if (!relpath) + return -1; + if (!strcmp(relpath, ".") || strncmp(relpath, "..", 2)) { + ERROR("%s cannot be mounted because it is located inside %s", dest, *invalid); + free(relpath); + return -1; + } + free(relpath); + } + + return 0; +} +#endif + /* rootfs, lxc_name, and lxc_path can be NULL when the container is created * without a rootfs. */ static inline int mount_entry_on_generic(struct mntent *mntent, @@ -2051,6 +2590,11 @@ static inline int mount_entry_on_generic(struct mntent *mntent, char *rootfs_path = NULL; int ret; bool dev, optional, relative; + const char *dest = path; + +#ifdef HAVE_ISULAD + char *rpath = NULL; +#endif optional = hasmntopt(mntent, "optional") != NULL; dev = hasmntopt(mntent, "dev") != NULL; @@ -2059,9 +2603,38 @@ static inline int mount_entry_on_generic(struct mntent *mntent, if (rootfs && rootfs->path) rootfs_path = rootfs->mount; - ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name, +#ifdef HAVE_ISULAD + // isulad: ensure that the destination of the bind mount is resolved of symlinks at mount time because + // any previous mounts can invalidate the next mount's destination. + // this can happen when a user specifies mounts within other mounts to cause breakouts or other + // evil stuff to try to escape the container's rootfs. + if (rootfs_path) { + rpath = follow_symlink_in_scope(path, rootfs_path); + if (!rpath) { + ERROR("Failed to get real path of '%s' in scope '%s'.", path, rootfs_path); + lxc_write_error_message(rootfs->errfd, "%s:%d: failed to get real path of '%s' in scope '%s'.", + __FILE__, __LINE__, path, rootfs_path); + return -1; + } + dest = rpath; + + ret = check_mount_destination(rootfs_path, dest); + if (ret) { + ERROR("Mount destination is invalid: '%s'", dest); + lxc_write_error_message(rootfs->errfd, "%s:%d: mount destination is invalid: '%s'.", + __FILE__, __LINE__, dest); + free(rpath); + return -1; + } + } +#endif + + ret = mount_entry_create_dir_file(mntent, dest, rootfs, lxc_name, lxc_path); if (ret < 0) { +#ifdef HAVE_ISULAD + free(rpath); +#endif if (optional) return 0; @@ -2069,6 +2642,29 @@ static inline int mount_entry_on_generic(struct mntent *mntent, } cull_mntent_opt(mntent); +#ifdef HAVE_ISULAD + ret = parse_mntopts(mntent->mnt_opts, &mntflags, &pflags, &mntdata); + if (ret < 0) { + free(rpath); + return -1; + } + + // support squashfs + if (strcmp(mntent->mnt_type, "squashfs") == 0) { + ret = mount_entry_with_loop_dev(mntent->mnt_fsname, dest, mntent->mnt_type, + mntent->mnt_opts, rootfs_path); + } else { + ret = mount_entry(mntent->mnt_fsname, dest, mntent->mnt_type, mntflags, + pflags, mntdata, optional, dev, relative, rootfs_path); + } + + if (ret < 0) { + lxc_write_error_message(rootfs->errfd, "%s:%d: failed to mount %s as type %s.", + __FILE__, __LINE__, mntent->mnt_fsname, mntent->mnt_type); + } + + free(rpath); +#else ret = parse_propagationopts(mntent->mnt_opts, &pflags); if (ret < 0) return -1; @@ -2077,8 +2673,9 @@ static inline int mount_entry_on_generic(struct mntent *mntent, if (ret < 0) return ret; - ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags, - pflags, mntdata, optional, dev, relative, rootfs_path); + ret = mount_entry(mntent->mnt_fsname, dest, mntent->mnt_type, mntflags, + pflags, mntdata, optional, dev, relative, rootfs_path); +#endif return ret; } @@ -2169,6 +2766,28 @@ static int mount_file_entries(const struct lxc_conf *conf, while (getmntent_r(file, &mntent, buf, sizeof(buf))) { int ret; +#ifdef HAVE_ISULAD + //isulad, system contaienr, skip "proc/sys/xxx" path + if (conf->systemd != NULL && strcmp(conf->systemd, "true") == 0) { + if (strstr(mntent.mnt_dir, "proc/sys") != NULL) { + continue; + } + } + + /* Note: Workaround for volume file path with space*/ + mntent.mnt_fsname = lxc_string_replace(SPACE_MAGIC_STR, " ", mntent.mnt_fsname); + if(!mntent.mnt_fsname) { + SYSERROR("memory allocation error"); + return -1; + } + mntent.mnt_dir = lxc_string_replace(SPACE_MAGIC_STR, " ", mntent.mnt_dir); + if(!mntent.mnt_dir) { + SYSERROR("memory allocation error"); + free(mntent.mnt_fsname); + return -1; + } +#endif + if (!rootfs->path) ret = mount_entry_on_systemfs(&mntent); else if (mntent.mnt_dir[0] != '/') @@ -2177,6 +2796,14 @@ static int mount_file_entries(const struct lxc_conf *conf, else ret = mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name, lxc_path); + +#ifdef HAVE_ISULAD + free(mntent.mnt_fsname); + mntent.mnt_fsname = NULL; + free(mntent.mnt_dir); + mntent.mnt_dir = NULL; +#endif + if (ret < 0) return -1; } @@ -2299,6 +2926,51 @@ static int setup_mount_entries(const struct lxc_conf *conf, return mount_file_entries(conf, rootfs, f, lxc_name, lxc_path); } +#ifdef HAVE_ISULAD +static bool have_dev_bind_mount_entry(FILE *file) +{ + bool have_bind_dev = false; + char buf[PATH_MAX]; + struct mntent mntent; + + while (getmntent_r(file, &mntent, buf, sizeof(buf))) { + mntent.mnt_dir = lxc_string_replace(SPACE_MAGIC_STR, " ", mntent.mnt_dir); + if(!mntent.mnt_dir) { + SYSERROR("memory allocation error"); + continue; + } + + if (strcmp(mntent.mnt_dir, "dev") == 0 && strcmp(mntent.mnt_type, "bind") == 0) { + have_bind_dev = true; + } + + free(mntent.mnt_dir); + mntent.mnt_dir = NULL; + + if (have_bind_dev) + return true; + } + + return false; +} + +// returns true if /dev needs to be set up. +static bool need_setup_dev(const struct lxc_conf *conf, struct lxc_list *mount) +{ + __do_fclose FILE *f = NULL; + + f = make_anonymous_mount_file(mount, conf->lsm_aa_allow_nesting); + if (!f) + return true; + + if (have_dev_bind_mount_entry(f)) { + return false; + } else { + return true; + } +} +#endif + static int parse_cap(const char *cap) { size_t i; @@ -2395,6 +3067,16 @@ static int dropcaps_except(struct lxc_list *caps) lxc_list_for_each (iterator, caps) { keep_entry = iterator->elem; +#ifdef HAVE_ISULAD + /* Do not keep any cap*/ + if (strcmp(keep_entry, "ISULAD_KEEP_NONE") == 0) { + DEBUG("Do not keep any capability"); + for(i = 0; i < numcaps; i++) { + caplist[i] = 0; + } + break; + } +#endif capid = parse_cap(keep_entry); if (capid == -2) continue; @@ -2443,7 +3125,11 @@ static int parse_resource(const char *res) return resid; } +#ifdef HAVE_ISULAD +int setup_resource_limits(struct lxc_list *limits, pid_t pid, int errfd) +#else int setup_resource_limits(struct lxc_list *limits, pid_t pid) +#endif { int resid; struct lxc_list *it; @@ -2457,8 +3143,17 @@ int setup_resource_limits(struct lxc_list *limits, pid_t pid) return log_error(-1, "Unknown resource %s", lim->resource); #if HAVE_PRLIMIT || HAVE_PRLIMIT64 +#if HAVE_ISULAD + if (prlimit(pid, resid, &lim->limit, NULL) != 0) { + lxc_write_error_message(errfd, "%s:%d: Failed to set limit %s %lu %lu: %s.", + __FILE__, __LINE__, lim->resource, + lim->limit.rlim_cur, lim->limit.rlim_max, strerror(errno)); + return log_error_errno(-1, errno, "Failed to set limit %s", lim->resource); + } +#else if (prlimit(pid, resid, &lim->limit, NULL) != 0) return log_error_errno(-1, errno, "Failed to set limit %s", lim->resource); +#endif TRACE("Setup \"%s\" limit", lim->resource); #else @@ -2546,10 +3241,10 @@ struct lxc_conf *lxc_conf_init(void) new->console.path = NULL; new->console.peer = -1; new->console.proxy.busy = -1; - new->console.proxy.ptmx = -1; - new->console.proxy.pts = -1; - new->console.ptmx = -1; - new->console.pts = -1; + new->console.proxy.master = -1; + new->console.proxy.slave = -1; + new->console.master = -1; + new->console.slave = -1; new->console.name[0] = '\0'; memset(&new->console.ringbuf, 0, sizeof(struct lxc_ringbuf)); new->maincmd_fd = -1; @@ -2601,6 +3296,27 @@ struct lxc_conf *lxc_conf_init(void) memset(&new->ns_share, 0, sizeof(char *) * LXC_NS_MAX); seccomp_conf_init(new); +#ifdef HAVE_ISULAD + lxc_list_init(&new->populate_devs); + lxc_list_init(&new->rootfs.maskedpaths); + lxc_list_init(&new->rootfs.ropaths); + new->exit_fd = -1; + new->umask = 0027; /*default umask 0027*/ + new->console.init_fifo[0] = NULL; + new->console.init_fifo[1] = NULL; + new->console.init_fifo[2] = NULL; + new->console.pipes[0][0] = -1; + new->console.pipes[0][1] = -1; + new->console.pipes[1][0] = -1; + new->console.pipes[1][1] = -1; + new->console.pipes[2][0] = -1; + new->console.pipes[2][1] = -1; + lxc_list_init(&new->console.fifos); + new->errmsg = NULL; + new->errpipe[0] = -1; + new->errpipe[1] = -1; +#endif + return new; } @@ -2716,10 +3432,19 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid) struct id_map *map; struct lxc_list *iterator; enum idtype type; + /* strlen("new@idmap") = 9 + * + + * strlen(" ") = 1 + * + + * INTTYPE_TO_STRLEN(uint32_t) + * + + * strlen(" ") = 1 + * + * We add some additional space to make sure that we really have + * LXC_IDMAPLEN bytes available for our the {g,u]id mapping. + */ int ret = 0, gidmap = 0, uidmap = 0; - char mapbuf[STRLITERALLEN("new@idmap") + STRLITERALLEN(" ") + - INTTYPE_TO_STRLEN(pid_t) + STRLITERALLEN(" ") + - LXC_IDMAPLEN] = {0}; + char mapbuf[9 + 1 + INTTYPE_TO_STRLEN(uint32_t) + 1 + LXC_IDMAPLEN] = {0}; bool had_entry = false, use_shadow = false; int hostuid, hostgid; @@ -2828,11 +3553,11 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid) return 0; } -/* - * Return the host uid/gid to which the container root is mapped in val. +/* Return the host uid/gid to which the container root is mapped in val. * Return true if id was found, false otherwise. */ -static id_t get_mapped_rootid(const struct lxc_conf *conf, enum idtype idtype) +static bool get_mapped_rootid(const struct lxc_conf *conf, enum idtype idtype, + unsigned long *val) { unsigned nsid; struct id_map *map; @@ -2849,13 +3574,11 @@ static id_t get_mapped_rootid(const struct lxc_conf *conf, enum idtype idtype) continue; if (map->nsid != nsid) continue; - return map->hostid; + *val = map->hostid; + return true; } - if (idtype == ID_TYPE_UID) - return LXC_INVALID_UID; - - return LXC_INVALID_GID; + return false; } int mapped_hostid(unsigned id, const struct lxc_conf *conf, enum idtype idtype) @@ -2896,6 +3619,129 @@ again: return freeid; } +int chown_mapped_root_exec_wrapper(void *args) +{ + execvp("lxc-usernsexec", args); + return -1; +} + +/* chown_mapped_root: for an unprivileged user with uid/gid X to + * chown a dir to subuid/subgid Y, he needs to run chown as root + * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and + * nsid Y is mapped to hostuid/hostgid X. That way, the container + * root is privileged with respect to hostuid/hostgid X, allowing + * him to do the chown. + */ +int chown_mapped_root(const char *path, const struct lxc_conf *conf) +{ + uid_t rootuid, rootgid; + unsigned long val; + int hostuid, hostgid, ret; + struct stat sb; + char map1[100], map2[100], map3[100], map4[100], map5[100]; + char ugid[100]; + const char *args1[] = {"lxc-usernsexec", + "-m", map1, + "-m", map2, + "-m", map3, + "-m", map5, + "--", "chown", ugid, path, + NULL}; + const char *args2[] = {"lxc-usernsexec", + "-m", map1, + "-m", map2, + "-m", map3, + "-m", map4, + "-m", map5, + "--", "chown", ugid, path, + NULL}; + char cmd_output[PATH_MAX]; + + hostuid = geteuid(); + hostgid = getegid(); + + if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) + return log_error(-1, "No uid mapping for container root"); + rootuid = (uid_t)val; + + if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) + return log_error(-1, "No gid mapping for container root"); + rootgid = (gid_t)val; + + if (hostuid == 0) { + if (chown(path, rootuid, rootgid) < 0) + return log_error(-1, "Error chowning %s", path); + + return 0; + } + + /* nothing to do */ + if (rootuid == hostuid) + return log_info(0, "Container root is our uid; no need to chown"); + + /* save the current gid of "path" */ + if (stat(path, &sb) < 0) + return log_error(-1, "Error stat %s", path); + + /* Update the path argument in case this was overlayfs. */ + args1[sizeof(args1) / sizeof(args1[0]) - 2] = path; + args2[sizeof(args2) / sizeof(args2[0]) - 2] = path; + + /* + * A file has to be group-owned by a gid mapped into the + * container, or the container won't be privileged over it. + */ + DEBUG("trying to chown \"%s\" to %d", path, hostgid); + if (sb.st_uid == hostuid && + mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 && + chown(path, -1, hostgid) < 0) + return log_error(-1, "Failed chgrping %s", path); + + /* "u:0:rootuid:1" */ + ret = snprintf(map1, 100, "u:0:%d:1", rootuid); + if (ret < 0 || ret >= 100) + return log_error(-1, "Error uid printing map string"); + + /* "u:hostuid:hostuid:1" */ + ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid); + if (ret < 0 || ret >= 100) + return log_error(-1, "Error uid printing map string"); + + /* "g:0:rootgid:1" */ + ret = snprintf(map3, 100, "g:0:%d:1", rootgid); + if (ret < 0 || ret >= 100) + return log_error(-1, "Error gid printing map string"); + + /* "g:pathgid:rootgid+pathgid:1" */ + ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid, + rootgid + (gid_t)sb.st_gid); + if (ret < 0 || ret >= 100) + return log_error(-1, "Error gid printing map string"); + + /* "g:hostgid:hostgid:1" */ + ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid); + if (ret < 0 || ret >= 100) + return log_error(-1, "Error gid printing map string"); + + /* "0:pathgid" (chown) */ + ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid); + if (ret < 0 || ret >= 100) + return log_error(-1, "Error owner printing format string for chown"); + + if (hostgid == sb.st_gid) + ret = run_command(cmd_output, sizeof(cmd_output), + chown_mapped_root_exec_wrapper, + (void *)args1); + else + ret = run_command(cmd_output, sizeof(cmd_output), + chown_mapped_root_exec_wrapper, + (void *)args2); + if (ret < 0) + ERROR("lxc-usernsexec failed: %s", cmd_output); + + return ret; +} + /* NOTE: Must not be called from inside the container namespace! */ int lxc_create_tmp_proc_mount(struct lxc_conf *conf) { @@ -2923,8 +3769,8 @@ void tmp_proc_unmount(struct lxc_conf *lxc_conf) lxc_conf->tmp_umount_proc = false; } -/* Walk /proc/mounts and change any shared entries to dependent mounts. */ -void turn_into_dependent_mounts(void) +/* Walk /proc/mounts and change any shared entries to slave. */ +void remount_all_slave(void) { __do_free char *line = NULL; __do_fclose FILE *f = NULL; @@ -3001,12 +3847,13 @@ again: null_endofword(target); ret = mount(NULL, target, NULL, MS_SLAVE, NULL); if (ret < 0) { - SYSERROR("Failed to recursively turn old root mount tree into dependent mount. Continuing..."); + SYSERROR("Failed to make \"%s\" MS_SLAVE", target); + ERROR("Continuing..."); continue; } - TRACE("Recursively turned old root mount tree into dependent mount"); + TRACE("Remounted \"%s\" as MS_SLAVE", target); } - TRACE("Turned all mount table entries into dependent mount"); + TRACE("Remounted all mount table entries as MS_SLAVE"); } static int lxc_execute_bind_init(struct lxc_handler *handler) @@ -3082,7 +3929,13 @@ int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, const char *name, return log_trace(0, "Bind mounted container / onto itself"); } - turn_into_dependent_mounts(); +#ifdef HAVE_ISULAD + if (!conf->rootfs.options) { + remount_all_slave(); + } +#else + remount_all_slave(); +#endif ret = run_lxc_hooks(name, "pre-mount", conf, NULL); if (ret < 0) @@ -3123,7 +3976,7 @@ static bool verify_start_hooks(struct lxc_conf *conf) static bool execveat_supported(void) { - execveat(-1, "", NULL, NULL, AT_EMPTY_PATH); + lxc_raw_execveat(-1, "", NULL, NULL, AT_EMPTY_PATH); if (errno == ENOSYS) return false; @@ -3180,60 +4033,738 @@ static int lxc_setup_boot_id(void) return 0; } -int lxc_setup(struct lxc_handler *handler) +#ifdef HAVE_ISULAD +/* isulad: setup devices which will be populated in the container.*/ +static int setup_populate_devs(const struct lxc_rootfs *rootfs, struct lxc_list *devs) { - __do_close int pts_mnt_fd = -EBADF; - int ret; - const char *lxcpath = handler->lxcpath, *name = handler->name; - struct lxc_conf *lxc_conf = handler->conf; - char *keyring_context = NULL; + int ret = 0; + char *pathdirname = NULL; + char path[MAXPATHLEN]; + mode_t file_mode = 0; + struct lxc_populate_devs *dev_elem = NULL; + struct lxc_list *it = NULL; + mode_t cur_mask; - ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath); - if (ret < 0) - return log_error(-1, "Failed to setup rootfs"); + INFO("Populating devices into container"); + cur_mask = umask(0000); + lxc_list_for_each(it, devs) { + ret = 0; + dev_elem = it->elem; - if (handler->nsfd[LXC_NS_UTS] == -EBADF) { - ret = setup_utsname(lxc_conf->utsname); - if (ret < 0) - return log_error(-1, "Failed to setup the utsname %s", name); - } + ret = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->path ? rootfs->mount : "", dev_elem->name); + if (ret < 0 || ret >= MAXPATHLEN) { + ret = -1; + goto reset_umask; + } - if (!lxc_conf->keyring_disable_session) { - if (lxc_conf->lsm_se_keyring_context) { - keyring_context = lxc_conf->lsm_se_keyring_context; - } else if (lxc_conf->lsm_se_context) { - keyring_context = lxc_conf->lsm_se_context; + /* create any missing directories */ + pathdirname = safe_strdup(path); + pathdirname = dirname(pathdirname); + ret = mkdir_p(pathdirname, 0755); + free(pathdirname); + if (ret < 0) { + WARN("Failed to create target directory"); + ret = -1; + goto reset_umask; } - ret = lxc_setup_keyring(keyring_context); - if (ret < 0) - return -1; - } + if (!strcmp(dev_elem->type, "c")) { + file_mode = dev_elem->file_mode | S_IFCHR; + } else if (!strcmp(dev_elem->type, "b")) { + file_mode = dev_elem->file_mode | S_IFBLK; + } else { + ERROR("Failed to parse devices type '%s'", dev_elem->type); + ret = -1; + goto reset_umask; + } - if (handler->ns_clone_flags & CLONE_NEWNET) { - ret = lxc_setup_network_in_child_namespaces(lxc_conf, - &lxc_conf->network); - if (ret < 0) - return log_error(-1, "Failed to setup network"); + DEBUG("Try to mknod '%s':'%d':'%d':'%d'\n", path, + file_mode, dev_elem->maj, dev_elem->min); - ret = lxc_network_send_name_and_ifindex_to_parent(handler); - if (ret < 0) - return log_error(-1, "Failed to send network device names and ifindices to parent"); + ret = mknod(path, file_mode, makedev(dev_elem->maj, dev_elem->min)); + if (ret && errno != EEXIST) { + SYSERROR("Failed to mknod '%s':'%d':'%d':'%d'", dev_elem->name, + file_mode, dev_elem->maj, dev_elem->min); + + char hostpath[MAXPATHLEN]; + FILE *pathfile = NULL; + + // Unprivileged containers cannot create devices, so + // try to bind mount the device from the host + ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", dev_elem->name); + if (ret < 0 || ret >= MAXPATHLEN) { + ret = -1; + goto reset_umask; + } + pathfile = lxc_fopen(path, "wb"); + if (!pathfile) { + SYSERROR("Failed to create device mount target '%s'", path); + ret = -1; + goto reset_umask; + } + fclose(pathfile); + if (safe_mount(hostpath, path, 0, MS_BIND, NULL, + rootfs->path ? rootfs->mount : NULL) != 0) { + SYSERROR("Failed bind mounting device %s from host into container", + dev_elem->name); + ret = -1; + goto reset_umask; + } + } + if (chown(path, dev_elem->uid, dev_elem->gid) < 0) { + ERROR("Error chowning %s", path); + ret = -1; + goto reset_umask; + } + ret = 0; } - if (wants_console(&lxc_conf->console)) { - pts_mnt_fd = open_tree(-EBADF, lxc_conf->console.name, - OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); - if (pts_mnt_fd < 0) - SYSTRACE("Failed to create detached mount for container's console \"%s\"", - lxc_conf->console.name); - else - TRACE("Created detached mount for container's console \"%s\"", - lxc_conf->console.name); - } +reset_umask: + (void)umask(cur_mask); + + INFO("Populated devices into container /dev"); + return ret; +} + +// isulad: setup rootfs mountopts +static int setup_rootfs_mountopts(const struct lxc_rootfs *rootfs) +{ + unsigned long mflags, mntflags, pflags; + char *mntdata = NULL; + + if(!rootfs || !rootfs->options) + return 0; + + if (parse_mntopts(rootfs->options, &mntflags, &pflags, &mntdata) < 0) { + free(mntdata); + return -1; + } + free(mntdata); + + if (mntflags & MS_RDONLY) { + mflags = add_required_remount_flags("/", NULL, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT); + DEBUG("remounting / as readonly"); + if (mount("/", "/", NULL, mflags, 0) < 0) { + SYSERROR("Failed to make / readonly."); + return -1; + } + } + return 0; +} + +struct oci_hook_conf { + defs_hook *ocihook; + + int errfd; + int which; +}; + +static int create_mtab_link() +{ + ssize_t ret; + int mret; + struct stat sbuf; + const char *pathname = "/proc/mounts"; + const char *slink = "/etc/mtab"; + + if (file_exists(slink)) { + return 0; + } + + ret = stat(pathname, &sbuf); + if (ret < 0) { + SYSERROR("Failed to stat %s: %s", pathname, strerror(errno)); + return -1; + } + + mret = symlink(pathname, slink); + if (mret < 0 && errno != EEXIST) { + if (errno == EROFS) { + WARN("Failed to create link %s for target %s. Read-only filesystem", slink, pathname); + } else { + SYSERROR("Failed to create \"%s\"", slink); + return -1; + } + } + + return 0; +} + +struct wait_conf { + pid_t pid; + unsigned long long startat; + int timeout; + int errfd; + int which; +}; + +static char* generate_json_str(const char *name, const char *lxcpath, const char *rootfs) +{ + char *cpid = NULL; + char *inmsg = NULL; + int rc = 0, ret = 0; + size_t size; + + if (!name || !lxcpath || !rootfs) { + ERROR("Invalid arguments"); + return NULL; + } + cpid = getenv("LXC_PID"); + if (!cpid) { + ERROR("Get container %s pid failed: %s", name, strerror(errno)); + cpid = "-1"; + } + + if ((strlen(name) + strlen(cpid) + strlen(rootfs) + strlen(lxcpath) + strlen(name)) > + SIZE_MAX - (strlen("{\"ociVersion\":\"\",\"id\":\"\",\"pid\":,\"root\":\"\",\"bundle\":\"\"}") - 1 - 1)) { + ERROR("Out of memory"); + ret = -1; + goto out_free; + } + + // {"ociVersion":"","id":"xxx","pid":777,"root":"xxx","bundle":"xxx"} + size = strlen("{\"ociVersion\":\"\",\"id\":\"\",\"pid\":,\"root\":\"\",\"bundle\":\"\"}") + + strlen(name) + strlen(cpid) + strlen(rootfs) + strlen(lxcpath) + 1 + strlen(name) + 1; + inmsg = malloc(size); + if (inmsg == NULL) { + ERROR("Out of memory"); + ret = -1; + goto out_free; + } + rc = snprintf(inmsg, size, + "{\"ociVersion\":\"\",\"id\":\"%s\",\"pid\":%s,\"root\":\"%s\",\"bundle\":\"%s/%s\"}", + name, cpid, rootfs, lxcpath, name); + if (rc < 0 || rc >= size) { + ERROR("Create json string failed"); + ret = -1; + } + +out_free: + if (ret) { + free(inmsg); + inmsg = NULL; + } + return inmsg; +} + +static char **merge_ocihook_env(char **oldenvs, size_t env_len, size_t *merge_env_len) +{ + char **result = NULL; + size_t result_len = env_len; + size_t i, j; + char *tmpenv = NULL; + char *lxc_envs[] = {"LD_LIBRARY_PATH", "PATH", "LXC_CGNS_AWARE", "LXC_PID", "LXC_ROOTFS_MOUNT", + "LXC_CONFIG_FILE", "LXC_CGROUP_PATH", "LXC_ROOTFS_PATH", "LXC_NAME" + }; + char *lxcenv_buf = NULL; + + if (result_len > SIZE_MAX - (sizeof(lxc_envs) / sizeof(char *)) - 1) + return NULL; + result_len += (sizeof(lxc_envs) / sizeof(char *)) + 1; + result = malloc(sizeof(char *) * result_len); + if (result == NULL) + return NULL; + memset(result, 0, sizeof(char *) * result_len); + + for(i = 0; i < env_len; i++) { + if (oldenvs[i]) + result[i] = safe_strdup(oldenvs[i]); + } + + for(j = 0; j < (sizeof(lxc_envs) / sizeof(char *)); j++) { + size_t env_buf_len = 0; + tmpenv = getenv(lxc_envs[j]); + if (tmpenv && i < (result_len - 1)) { + if (strlen(tmpenv) > (SIZE_MAX - 1 - 1 - strlen(lxc_envs[j]))) { + lxc_free_array((void **)result, free); + return NULL; + } + env_buf_len = ((strlen(tmpenv) + 1) + strlen(lxc_envs[j])) + 1; + lxcenv_buf = malloc(env_buf_len); + if (lxcenv_buf == NULL) { + lxc_free_array((void **)result, free); + return NULL; + } + if (snprintf(lxcenv_buf, env_buf_len, "%s=%s", lxc_envs[j], tmpenv) < 0) { + free(lxcenv_buf); + continue; + } + result[i++] = lxcenv_buf; + lxcenv_buf = NULL; + } + } + + *merge_env_len = i; + return result; +} + +static struct lxc_popen_FILE *lxc_popen_ocihook(const char *commandpath, char **args, int args_len, + char **envs, int env_len, const char *instr) +{ + int ret; + struct lxc_popen_FILE *fp = NULL; + int pipe_fds[2] = {-1, -1}; + int pipe_msg[2] = {-1, -1}; + pid_t child_pid; + + ret = pipe2(pipe_fds, O_CLOEXEC | O_NONBLOCK); + if (ret < 0) + return NULL; + + ret = pipe2(pipe_msg, O_CLOEXEC | O_NONBLOCK); + if (ret < 0) { + ERROR("Pipe msg failure"); + close(pipe_fds[0]); + close(pipe_fds[1]); + return NULL; + } + + child_pid = fork(); + if (child_pid < 0) + goto on_error; + + if (child_pid == 0) { + close(pipe_msg[1]); + if (pipe_msg[0] != STDIN_FILENO) + dup2(pipe_msg[0], STDIN_FILENO); + else { + if (fcntl(pipe_msg[0], F_SETFD, 0) != 0) { + fprintf(stderr, "Failed to remove FD_CLOEXEC from fd."); + exit(127); + } + } + close(pipe_msg[0]); + + close(pipe_fds[0]); + + /* duplicate stdout */ + if (pipe_fds[1] != STDOUT_FILENO) + ret = dup2(pipe_fds[1], STDOUT_FILENO); + else + ret = fcntl(pipe_fds[1], F_SETFD, 0); + if (ret < 0) { + close(pipe_fds[1]); + _exit(EXIT_FAILURE); + } + + /* duplicate stderr */ + if (pipe_fds[1] != STDERR_FILENO) + ret = dup2(pipe_fds[1], STDERR_FILENO); + else + ret = fcntl(pipe_fds[1], F_SETFD, 0); + close(pipe_fds[1]); + if (ret < 0) + _exit(EXIT_FAILURE); + + if (lxc_check_inherited(NULL, true, NULL, 0) != 0) { + fprintf(stderr, "check inherited fd failed"); + exit(127); + } + + /* + * Unblock signals. + * This is the main/only reason + * why we do our lousy popen() emulation. + */ + { + sigset_t mask; + sigfillset(&mask); + sigprocmask(SIG_UNBLOCK, &mask, NULL); + } + + if (env_len > 0) + execvpe(commandpath, args, envs); + else + execvp(commandpath, args); + fprintf(stderr, "fork/exec %s: %s", commandpath, strerror(errno)); + exit(127); + } + + /* parent */ + + close(pipe_fds[1]); + pipe_fds[1] = -1; + + close(pipe_msg[0]); + pipe_msg[0]= -1; + if (instr) { + size_t len = strlen(instr); + if (lxc_write_nointr(pipe_msg[1], instr, len) != len) { + WARN("Write instr: %s failed", instr); + } + } + close(pipe_msg[1]); + pipe_msg[1]= -1; + + fp = calloc(1, sizeof(*fp)); + if (!fp) { + ERROR("Failed to allocate memory"); + goto on_error; + } + + fp->child_pid = child_pid; + fp->pipe = pipe_fds[0]; + + return fp; + +on_error: + + if (pipe_fds[0] >= 0) + close(pipe_fds[0]); + + if (pipe_fds[1] >= 0) + close(pipe_fds[1]); + + if (pipe_msg[0] >= 0) + close(pipe_msg[0]); + + if (pipe_msg[1] >= 0) + close(pipe_msg[1]); + + if (fp) + free(fp); + + return NULL; +} + +void* wait_ocihook_timeout(void *arg) +{ + bool alive = false; + struct wait_conf *conf = (struct wait_conf *)arg; + + if (!conf || conf->timeout < 1) + goto out; + + sleep(conf->timeout); + + alive = lxc_process_alive(conf->pid, conf->startat); + + if (alive) { + ERROR("%s:%d: running %s hook caused \"hook ran past specified timeout of %.1fs\"", + __FILE__, __LINE__, lxchook_names[conf->which], + (double)conf->timeout); + + lxc_write_error_message(conf->errfd, "%s:%d: running %s hook caused \"hook ran past specified timeout of %.1fs\".", + __FILE__, __LINE__, lxchook_names[conf->which], + (double)conf->timeout); + + if (kill(conf->pid, SIGKILL) && errno != ESRCH) { + ERROR("Send kill signal failed"); + goto out; + } + } + +out: + free(conf); + return ((void *)0); +} + +static int run_ocihook_buffer(struct oci_hook_conf *oconf, const char *inmsg) +{ + struct lxc_popen_FILE *f; + char output[LXC_LOG_BUFFER_SIZE] = {0}; + int ret; + pthread_t ptid; + int err; + struct wait_conf *conf = NULL; + pthread_attr_t attr; + char *buffer = oconf->ocihook->path; + char *err_args_msg = NULL; + char *err_envs_msg = NULL; + char **hookenvs = NULL; + size_t hookenvs_len = 0; + + hookenvs = merge_ocihook_env(oconf->ocihook->env, oconf->ocihook->env_len, &hookenvs_len); + if (!hookenvs) { + ERROR("Out of memory."); + return -1; + } + + f = lxc_popen_ocihook(buffer, oconf->ocihook->args, oconf->ocihook->args_len, hookenvs, hookenvs_len, inmsg); + lxc_free_array((void **)hookenvs, free); + if (!f) { + SYSERROR("Failed to popen() %s.", buffer); + return -1; + } + + conf = malloc(sizeof(struct wait_conf)); + if (conf == NULL) { + SYSERROR("Failed to malloc."); + goto on_error; + } + + memset(conf, 0x00, sizeof(struct wait_conf)); + + conf->pid = f->child_pid; + conf->startat = lxc_get_process_startat(conf->pid); + + INFO("hook_conf timeout %d", oconf->ocihook->timeout); + if(oconf->ocihook->timeout > 0) + conf->timeout = oconf->ocihook->timeout; + else { + conf->timeout = 30; + INFO("Set hook timeout 30s"); + } + conf->errfd = oconf->errfd; + conf->which = oconf->which; + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + err = pthread_create(&ptid, &attr, wait_ocihook_timeout, conf); + if (err != 0) { + ERROR("Create wait timeout thread failed"); + free(conf); + goto on_error; + } + + ret = lxc_wait_for_pid_status(f->child_pid); + + lxc_read_nointr(f->pipe, output, sizeof(output) - 1); + close(f->pipe); + free(f); + + if (ret == -1) { + SYSERROR("Script exited with error."); + goto print_hook; + } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) { + ERROR("Script exited with status %d. output: %s", WEXITSTATUS(ret), output); + lxc_write_error_message(oconf->errfd, "%s:%d: running %s hook caused \"error running hook: exit status %d, output: %s\".", + __FILE__, __LINE__, + (oconf->which >= NUM_LXC_HOOKS) ? "invalid type" : lxchook_names[oconf->which], + WEXITSTATUS(ret), output); + + goto print_hook; + } else if (WIFSIGNALED(ret)) { + ERROR("Script terminated by signal %d.", WTERMSIG(ret)); + lxc_write_error_message(oconf->errfd, "%s:%d: running %s hook caused \"error running hook: Script terminated by signal %d\".", + __FILE__, __LINE__, + (oconf->which >= NUM_LXC_HOOKS) ? "invalid type" : lxchook_names[oconf->which], + WTERMSIG(ret)); + + goto print_hook; + } + + return 0; + +on_error: + if (f) { + if (f->pipe >= 0) + close(f->pipe); + free(f); + } + +print_hook: + if (oconf->ocihook->args) + err_args_msg = lxc_string_join(" ", (const char **)oconf->ocihook->args, false); + if (oconf->ocihook->env) + err_envs_msg = lxc_string_join(" ", (const char **)oconf->ocihook->env, false); + ERROR("Hook script command: \"%s\", args: \"%s\", envs: \"%s\", timeout: %d.", + buffer, err_args_msg ? err_args_msg : "", + err_envs_msg ? err_envs_msg : "", oconf->ocihook->timeout); + + free(err_args_msg); + free(err_envs_msg); + return -1; +} + +static int run_ocihook_script_argv(const char *name, const char *section, + struct oci_hook_conf *oconf, + const char *lxcpath, const char *rootfs) +{ + int ret; + const char *script = oconf->ocihook->path; + char *inmsg = NULL; + + INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".", + script, name, section); + + inmsg = generate_json_str(name, lxcpath, rootfs); + if (!inmsg) { + return -1; + } + + ret = run_ocihook_buffer(oconf, inmsg); + free(inmsg); + inmsg = NULL; + return ret; +} + +static char *get_root_path(const char *path, const char *backend) +{ + char *ret = NULL; + char *tmp = NULL; + + if (!path) { + ret = safe_strdup("/"); + return ret; + } + if (!backend) { + goto default_out; + } + + if (strcmp(backend, "aufs") == 0 || + strcmp(backend, "overlayfs") == 0 || + strcmp(backend, "loop") == 0) { + tmp = strrchr(path, ':'); + if (tmp == NULL) { + ERROR("Invalid root path format"); + return NULL; + } + tmp++; + ret = safe_strdup(tmp); + return ret; + } + +default_out: + ret = safe_strdup(path); + return ret; +} + +static int do_run_oci_hooks(const char *name, const char *lxcpath, struct lxc_conf *lc, int which, int errfd) +{ + struct oci_hook_conf work_conf = {0}; + size_t i; + int ret = 0; + int nret = 0; + char *rootpath = NULL; + + if (!lc) { + return -1; + } + if (!lc->ocihooks) { + return 0; + } + + rootpath = get_root_path(lc->rootfs.path, lc->rootfs.bdev_type); + if (!rootpath) { + ERROR("Get container %s rootpath failed.", name); + return -1; + } + + work_conf.errfd = errfd; + work_conf.which = which; + switch (which) { + case OCI_HOOK_PRESTART: + for (i = 0; i < lc->ocihooks->prestart_len; i++) { + work_conf.ocihook = lc->ocihooks->prestart[i]; + ret = run_ocihook_script_argv(name, "lxc", &work_conf, lxcpath, rootpath); + if (ret != 0) + break; + } + break; + case OCI_HOOK_POSTSTART: + for (i = 0; i < lc->ocihooks->poststart_len; i++) { + work_conf.ocihook = lc->ocihooks->poststart[i]; + nret = run_ocihook_script_argv(name, "lxc", &work_conf, lxcpath, rootpath); + if (nret != 0) + WARN("running poststart hook %zu failed, ContainerId: %s", i, name); + } + break; + case OCI_HOOK_POSTSTOP: + for (i = 0; i < lc->ocihooks->poststop_len; i++) { + work_conf.ocihook = lc->ocihooks->poststop[i]; + nret = run_ocihook_script_argv(name, "lxc", &work_conf, lxcpath, rootpath); + if (nret != 0) + WARN("running poststart hook %zu failed, ContainerId: %s", i, name); + } + break; + default: + ret = -1; + } + if (rootpath) + free(rootpath); + return ret; +} + +int run_oci_hooks(const char *name, const char *hookname, struct lxc_conf *conf, const char *lxcpath) +{ + int which = -1; + + if (strcmp(hookname, "oci-prestart") == 0) { + which = OCI_HOOK_PRESTART; + if (!lxcpath) { + ERROR("oci hook require lxcpath"); + return -1; + } + return do_run_oci_hooks(name, lxcpath, conf, which, conf->errpipe[1]); + } else if (strcmp(hookname, "oci-poststart") == 0) { + which = OCI_HOOK_POSTSTART; + if (!lxcpath) { + ERROR("oci hook require lxcpath"); + return -1; + } + return do_run_oci_hooks(name, lxcpath, conf, which, conf->errpipe[1]); + } else if (strcmp(hookname, "oci-poststop") == 0) { + which = OCI_HOOK_POSTSTOP; + if (!lxcpath) { + ERROR("oci hook require lxcpath"); + return -1; + } + return do_run_oci_hooks(name, lxcpath, conf, which, conf->errpipe[1]); + } else + return -1; + + return 0; +} +#endif + +int lxc_setup(struct lxc_handler *handler) +{ + int ret; + const char *lxcpath = handler->lxcpath, *name = handler->name; + struct lxc_conf *lxc_conf = handler->conf; + char *keyring_context = NULL; +#ifdef HAVE_ISULAD + bool setup_dev = true; +#endif + + ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath); +#ifdef HAVE_ISULAD + if (ret < 0) { + lxc_write_error_message(lxc_conf->errpipe[1], "%s:%d: failed to setup rootfs %s.", + __FILE__, __LINE__, lxc_conf->rootfs.path); + return log_error(-1, "Failed to setup rootfs"); + } +#else + if (ret < 0) + return log_error(-1, "Failed to setup rootfs"); +#endif + + if (handler->nsfd[LXC_NS_UTS] == -EBADF) { + ret = setup_utsname(lxc_conf->utsname); + if (ret < 0) + return log_error(-1, "Failed to setup the utsname %s", name); + } + + if (!lxc_conf->keyring_disable_session) { + if (lxc_conf->lsm_se_keyring_context) { + keyring_context = lxc_conf->lsm_se_keyring_context; + } else if (lxc_conf->lsm_se_context) { + keyring_context = lxc_conf->lsm_se_context; + } + + ret = lxc_setup_keyring(keyring_context); + if (ret < 0) + return -1; + } + + if (handler->ns_clone_flags & CLONE_NEWNET) { + ret = lxc_setup_network_in_child_namespaces(lxc_conf, + &lxc_conf->network); + if (ret < 0) + return log_error(-1, "Failed to setup network"); + + ret = lxc_network_send_name_and_ifindex_to_parent(handler); + if (ret < 0) + return log_error(-1, "Failed to send network device names and ifindices to parent"); + } if (lxc_conf->autodev > 0) { +#ifdef HAVE_ISULAD + ret = mount_autodev(name, &lxc_conf->rootfs, lxc_conf->autodevtmpfssize, lxcpath, lxc_conf->systemd); +#else ret = mount_autodev(name, &lxc_conf->rootfs, lxc_conf->autodevtmpfssize, lxcpath); +#endif if (ret < 0) return log_error(-1, "Failed to mount \"/dev\""); } @@ -3254,6 +4785,9 @@ int lxc_setup(struct lxc_handler *handler) &lxc_conf->mount_list, name, lxcpath); if (ret < 0) return log_error(-1, "Failed to setup mount entries"); +#ifdef HAVE_ISULAD + setup_dev = need_setup_dev(lxc_conf, &lxc_conf->mount_list); +#endif } if (lxc_conf->is_execute) { @@ -3300,16 +4834,21 @@ int lxc_setup(struct lxc_handler *handler) return log_error(-1, "Failed to populate \"/dev\""); } +#ifdef HAVE_ISULAD + /* isulad: setup devices which will be populated in the container. */ + if (!lxc_list_empty(&lxc_conf->populate_devs) && setup_dev) { + if (setup_populate_devs(&lxc_conf->rootfs, &lxc_conf->populate_devs) != 0) { + return log_error(-1, "Failed to setup devices in the container"); + } + } +#endif + /* Make sure any start hooks are in the container */ if (!verify_start_hooks(lxc_conf)) return log_error(-1, "Failed to verify start hooks"); - ret = lxc_create_tmp_proc_mount(lxc_conf); - if (ret < 0) - return log_error(-1, "Failed to \"/proc\" LSMs"); - ret = lxc_setup_console(&lxc_conf->rootfs, &lxc_conf->console, - lxc_conf->ttys.dir, pts_mnt_fd); + lxc_conf->ttys.dir); if (ret < 0) return log_error(-1, "Failed to setup console"); @@ -3317,50 +4856,201 @@ int lxc_setup(struct lxc_handler *handler) if (ret < 0) return log_error(-1, "Failed to setup \"/dev\" symlinks"); + ret = lxc_create_tmp_proc_mount(lxc_conf); + if (ret < 0) + return log_error(-1, "Failed to \"/proc\" LSMs"); + +#ifdef HAVE_ISULAD + /* Ask father to run oci prestart hooks and wait for him to finish. */ + if (lxc_sync_barrier_parent(handler, LXC_SYNC_OCI_PRESTART_HOOK)) { + return log_error(-1, "Failed to sync parent to start host hook"); + } +#endif ret = lxc_setup_rootfs_switch_root(&lxc_conf->rootfs); if (ret < 0) return log_error(-1, "Failed to pivot root into rootfs"); - /* Setting the boot-id is best-effort for now. */ - if (lxc_conf->autodev > 0) - (void)lxc_setup_boot_id(); + /* Setting the boot-id is best-effort for now. */ + if (lxc_conf->autodev > 0) + (void)lxc_setup_boot_id(); + +#ifdef HAVE_ISULAD + if (setup_rootfs_mountopts(&lxc_conf->rootfs)) { + return log_error(-1, "failed to set rootfs for '%s'", name); + } + if (lxc_conf->rootfs.path != NULL && setup_dev) { + ret = lxc_setup_devpts(lxc_conf); + if (ret < 0) { + return log_error(-1, "Failed to setup new devpts instance for '%s'", name); + } + } +#else + ret = lxc_setup_devpts(lxc_conf); + if (ret < 0) + return log_error(-1, "Failed to setup new devpts instance"); +#endif + + ret = lxc_create_ttys(handler); + if (ret < 0) + return -1; + +#ifdef HAVE_ISULAD + /*isulad: set system umask */ + umask(lxc_conf->umask); +#endif + + ret = setup_personality(lxc_conf->personality); + if (ret < 0) + return log_error(-1, "Failed to set personality"); + + /* Set sysctl value to a path under /proc/sys as determined from the + * key. For e.g. net.ipv4.ip_forward translated to + * /proc/sys/net/ipv4/ip_forward. + */ + if (!lxc_list_empty(&lxc_conf->sysctls)) { + ret = setup_sysctl_parameters(&lxc_conf->sysctls); + if (ret < 0) + return log_error(-1, "Failed to setup sysctl parameters"); + } + +#ifdef HAVE_ISULAD + // isulad: setup rootfs masked paths + if (!lxc_list_empty(&lxc_conf->rootfs.maskedpaths)) { + if (setup_rootfs_maskedpaths(&lxc_conf->rootfs.maskedpaths)) { + return log_error(-1, "failed to setup maskedpaths"); + } + } + + // isulad: setup rootfs ro paths + if (!lxc_list_empty(&lxc_conf->rootfs.ropaths)) { + if (setup_rootfs_ropaths(&lxc_conf->rootfs.ropaths)) { + return log_error(-1, "failed to setup readonlypaths"); + } + } + + //isulad: system container, remount /proc/sys/xxx by mount_list + if (lxc_conf->systemd != NULL && strcmp(lxc_conf->systemd, "true") == 0) { + if (!lxc_list_empty(&lxc_conf->mount_list)) { + if (remount_proc_sys_mount_entries(&lxc_conf->mount_list, + lxc_conf->lsm_aa_allow_nesting)) { + return log_error(-1, "failed to remount /proc/sys"); + } + } + } + + // isulad: create link /etc/mtab for /proc/mounts + if (create_mtab_link() != 0) { + return log_error(-1, "failed to create link /etc/mtab for target /proc/mounts"); + } +#endif + + if (!lxc_list_empty(&lxc_conf->keepcaps)) { + if (!lxc_list_empty(&lxc_conf->caps)) + return log_error(-1, "Container requests lxc.cap.drop and lxc.cap.keep: either use lxc.cap.drop or lxc.cap.keep, not both"); + + if (dropcaps_except(&lxc_conf->keepcaps)) + return log_error(-1, "Failed to keep capabilities"); + } else if (setup_caps(&lxc_conf->caps)) { + return log_error(-1, "Failed to drop capabilities"); + } + + NOTICE("The container \"%s\" is set up", name); + + return 0; +} + +#ifdef HAVE_ISULAD +/* isulad drop caps for container*/ +int lxc_drop_caps(struct lxc_conf *conf) +{ +#define __DEF_CAP_TO_MASK(x) (1U << ((x) & 31)) +#if HAVE_LIBCAP + int ret = 0; + struct lxc_list *iterator = NULL; + char *keep_entry = NULL; + size_t i = 0; + int capid; + size_t numcaps = (size_t)lxc_caps_last_cap() + 1; + struct lxc_list *caps = NULL; + int *caplist = NULL; + + if (lxc_list_empty(&conf->keepcaps)) + return 0; + + caps = &conf->keepcaps; + + if (numcaps <= 0 || numcaps > 200) + return -1; + + // caplist[i] is 1 if we keep capability i + caplist = malloc(numcaps * sizeof(int)); + if (caplist == NULL) { + ERROR("Out of memory"); + return -1; + } + (void)memset(caplist, 0, numcaps * sizeof(int)); + + lxc_list_for_each(iterator, caps) { + + keep_entry = iterator->elem; + /* isulad: Do not keep any cap*/ + if (strcmp(keep_entry, "ISULAD_KEEP_NONE") == 0) { + DEBUG("Do not keep any capability"); + for(i = 0; i < numcaps; i++) { + caplist[i] = 0; + } + break; + } + + capid = parse_cap(keep_entry); + + if (capid == -2) + continue; + + if (capid < 0) { + ERROR("unknown capability %s", keep_entry); + ret = -1; + goto out; + } + + DEBUG("keep capability '%s' (%d)", keep_entry, capid); + + caplist[capid] = 1; + } + + struct __user_cap_header_struct cap_header_data; + struct __user_cap_data_struct cap_data_data[2]; - ret = lxc_setup_devpts(lxc_conf); - if (ret < 0) - return log_error(-1, "Failed to setup new devpts instance"); + cap_user_header_t cap_header = &cap_header_data; + cap_user_data_t cap_data = &cap_data_data[0]; - ret = lxc_create_ttys(handler); - if (ret < 0) - return -1; + memset(cap_header, 0,sizeof(struct __user_cap_header_struct)); + memset(cap_data, 0, sizeof(struct __user_cap_data_struct) * 2); - ret = setup_personality(lxc_conf->personality); - if (ret < 0) - return log_error(-1, "Failed to set personality"); + cap_header->pid = 0; + cap_header->version = _LINUX_CAPABILITY_VERSION_3; - /* Set sysctl value to a path under /proc/sys as determined from the - * key. For e.g. net.ipv4.ip_forward translated to - * /proc/sys/net/ipv4/ip_forward. - */ - if (!lxc_list_empty(&lxc_conf->sysctls)) { - ret = setup_sysctl_parameters(&lxc_conf->sysctls); - if (ret < 0) - return log_error(-1, "Failed to setup sysctl parameters"); + for (i = 0; i < numcaps; i++) { + if (caplist[i]) { + cap_data[CAP_TO_INDEX(i)].effective = cap_data[CAP_TO_INDEX(i)].effective | (i > 31 ? __DEF_CAP_TO_MASK(i % 32) : __DEF_CAP_TO_MASK(i)); + cap_data[CAP_TO_INDEX(i)].permitted = cap_data[CAP_TO_INDEX(i)].permitted | (i > 31 ? __DEF_CAP_TO_MASK(i % 32) : __DEF_CAP_TO_MASK(i)); + cap_data[CAP_TO_INDEX(i)].inheritable = cap_data[CAP_TO_INDEX(i)].inheritable | (i > 31 ? __DEF_CAP_TO_MASK(i % 32) : __DEF_CAP_TO_MASK(i)); + } } - if (!lxc_list_empty(&lxc_conf->keepcaps)) { - if (!lxc_list_empty(&lxc_conf->caps)) - return log_error(-1, "Container requests lxc.cap.drop and lxc.cap.keep: either use lxc.cap.drop or lxc.cap.keep, not both"); - - if (dropcaps_except(&lxc_conf->keepcaps)) - return log_error(-1, "Failed to keep capabilities"); - } else if (setup_caps(&lxc_conf->caps)) { - return log_error(-1, "Failed to drop capabilities"); + if (capset(cap_header, cap_data)) { + SYSERROR("Failed to set capabilitys"); + ret = -1; + goto out; } - NOTICE("The container \"%s\" is set up", name); +#endif - return 0; +out: + free(caplist); + return ret; } +#endif int run_lxc_hooks(const char *name, char *hookname, struct lxc_conf *conf, char *argv[]) @@ -3406,7 +5096,7 @@ static int lxc_free_idmap(struct lxc_list *id_map) { struct lxc_list *it, *next; - lxc_list_for_each_safe(it, id_map, next) { + lxc_list_for_each_safe (it, id_map, next) { lxc_list_del(it); free(it->elem); free(it); @@ -3753,6 +5443,23 @@ void lxc_conf_free(struct lxc_conf *conf) free(conf->cgroup_meta.controllers); free(conf->shmount.path_host); free(conf->shmount.path_cont); +#ifdef HAVE_ISULAD + free(conf->container_info_file); + if (conf->exit_fd != -1) { + close(conf->exit_fd); + } + free(conf->systemd); + lxc_clear_init_args(conf); + lxc_clear_init_groups(conf); + lxc_clear_populate_devices(conf); + lxc_clear_rootfs_masked_paths(conf); + lxc_clear_rootfs_ro_paths(conf); + free(conf->errmsg); + lxc_close_error_pipe(conf->errpipe); + if (conf->ocihooks) { + free_oci_runtime_spec_hooks(conf->ocihooks); + } +#endif free(conf); } @@ -3842,19 +5549,18 @@ static struct id_map *mapped_hostid_add(const struct lxc_conf *conf, uid_t id, /* Reuse existing mapping. */ tmp = find_mapped_hostid_entry(conf, id, type); - if (tmp) { - memcpy(entry, tmp, sizeof(*entry)); - } else { - /* Find new mapping. */ - hostid_mapped = find_unmapped_nsid(conf, type); - if (hostid_mapped < 0) - return log_debug(NULL, "Failed to find free mapping for id %d", id); + if (tmp) + return memcpy(entry, tmp, sizeof(*entry)); - entry->idtype = type; - entry->nsid = hostid_mapped; - entry->hostid = (unsigned long)id; - entry->range = 1; - } + /* Find new mapping. */ + hostid_mapped = find_unmapped_nsid(conf, type); + if (hostid_mapped < 0) + return log_debug(NULL, "Failed to find free mapping for id %d", id); + + entry->idtype = type; + entry->nsid = hostid_mapped; + entry->hostid = (unsigned long)id; + entry->range = 1; return move_ptr(entry); } @@ -3878,7 +5584,7 @@ static struct lxc_list *get_minimal_idmap(const struct lxc_conf *conf, euid = geteuid(); if (euid >= container_root_uid->hostid && euid < (container_root_uid->hostid + container_root_uid->range)) - host_uid_map = move_ptr(container_root_uid); + host_uid_map = container_root_uid; container_root_gid = mapped_nsid_add(conf, nsgid, ID_TYPE_GID); if (!container_root_gid) @@ -3886,7 +5592,7 @@ static struct lxc_list *get_minimal_idmap(const struct lxc_conf *conf, egid = getegid(); if (egid >= container_root_gid->hostid && egid < (container_root_gid->hostid + container_root_gid->range)) - host_gid_map = move_ptr(container_root_gid); + host_gid_map = container_root_gid; /* Check whether the {g,u}id of the user has a mapping. */ if (!host_uid_map) @@ -3909,35 +5615,45 @@ static struct lxc_list *get_minimal_idmap(const struct lxc_conf *conf, tmplist = malloc(sizeof(*tmplist)); if (!tmplist) return NULL; - /* idmap will now keep track of that memory. */ - lxc_list_add_elem(tmplist, move_ptr(host_uid_map)); + lxc_list_add_elem(tmplist, container_root_uid); lxc_list_add_tail(idmap, tmplist); - if (container_root_uid) { + if (host_uid_map && (host_uid_map != container_root_uid)) { + /* idmap will now keep track of that memory. */ + move_ptr(container_root_uid); + /* Add container root to the map. */ tmplist = malloc(sizeof(*tmplist)); if (!tmplist) return NULL; - /* idmap will now keep track of that memory. */ - lxc_list_add_elem(tmplist, move_ptr(container_root_uid)); + lxc_list_add_elem(tmplist, host_uid_map); lxc_list_add_tail(idmap, tmplist); } + /* idmap will now keep track of that memory. */ + move_ptr(container_root_uid); + /* idmap will now keep track of that memory. */ + move_ptr(host_uid_map); tmplist = malloc(sizeof(*tmplist)); if (!tmplist) return NULL; - /* idmap will now keep track of that memory. */ - lxc_list_add_elem(tmplist, move_ptr(host_gid_map)); + lxc_list_add_elem(tmplist, container_root_gid); lxc_list_add_tail(idmap, tmplist); - if (container_root_gid) { + if (host_gid_map && (host_gid_map != container_root_gid)) { + /* idmap will now keep track of that memory. */ + move_ptr(container_root_gid); + tmplist = malloc(sizeof(*tmplist)); if (!tmplist) return NULL; - /* idmap will now keep track of that memory. */ - lxc_list_add_elem(tmplist, move_ptr(container_root_gid)); + lxc_list_add_elem(tmplist, host_gid_map); lxc_list_add_tail(idmap, tmplist); } + /* idmap will now keep track of that memory. */ + move_ptr(container_root_gid); + /* idmap will now keep track of that memory. */ + move_ptr(host_gid_map); TRACE("Allocated minimal idmapping for ns uid %d and ns gid %d", nsuid, nsgid); @@ -3966,13 +5682,9 @@ int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data, call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL; int ret = -1, status = -1; char c = '1'; - struct userns_fn_data d = { - .arg = data, - .fn = fn, - .fn_name = fn_name, - }; pid_t pid; int pipe_fds[2]; + struct userns_fn_data d; if (!conf) return -EINVAL; @@ -3985,6 +5697,9 @@ int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data, if (ret < 0) return -errno; + d.fn = fn; + d.fn_name = fn_name; + d.arg = data; d.p[0] = pipe_fds[0]; d.p[1] = pipe_fds[1]; @@ -4344,226 +6059,6 @@ on_error: return ret; } -static int add_idmap_entry(struct lxc_list *idmap, enum idtype idtype, - unsigned long nsid, unsigned long hostid, - unsigned long range) -{ - __do_free struct id_map *new_idmap = NULL; - __do_free struct lxc_list *new_list = NULL; - - new_idmap = zalloc(sizeof(*new_idmap)); - if (!new_idmap) - return ret_errno(ENOMEM); - - new_idmap->idtype = idtype; - new_idmap->hostid = hostid; - new_idmap->nsid = nsid; - new_idmap->range = range; - - new_list = zalloc(sizeof(*new_list)); - if (!new_list) - return ret_errno(ENOMEM); - - new_list->elem = move_ptr(new_idmap); - lxc_list_add_tail(idmap, move_ptr(new_list)); - - INFO("Adding id map: type %c nsid %lu hostid %lu range %lu", - idtype == ID_TYPE_UID ? 'u' : 'g', nsid, hostid, range); - return 0; -} - -int userns_exec_mapped_root(const char *path, int path_fd, - const struct lxc_conf *conf) -{ - call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL; - __do_close int fd = -EBADF; - int target_fd = -EBADF; - char c = '1'; - ssize_t ret; - pid_t pid; - int sock_fds[2]; - uid_t container_host_uid, hostuid; - gid_t container_host_gid, hostgid; - struct stat st; - - if (!conf || (!path && path_fd < 0)) - return ret_errno(EINVAL); - - if (!path) - path = "(null)"; - - container_host_uid = get_mapped_rootid(conf, ID_TYPE_UID); - if (!uid_valid(container_host_uid)) - return log_error(-1, "No uid mapping for container root"); - - container_host_gid = get_mapped_rootid(conf, ID_TYPE_GID); - if (!gid_valid(container_host_gid)) - return log_error(-1, "No gid mapping for container root"); - - if (path_fd < 0) { - fd = open(path, O_CLOEXEC | O_NOCTTY); - if (fd < 0) - return log_error_errno(-errno, errno, "Failed to open \"%s\"", path); - target_fd = fd; - } else { - target_fd = path_fd; - } - - hostuid = geteuid(); - /* We are root so chown directly. */ - if (hostuid == 0) { - ret = fchown(target_fd, container_host_uid, container_host_gid); - if (ret) - return log_error_errno(-errno, errno, - "Failed to fchown(%d(%s), %d, %d)", - target_fd, path, container_host_uid, - container_host_gid); - return log_trace(0, "Chowned %d(%s) to uid %d and %d", target_fd, path, - container_host_uid, container_host_gid); - } - - /* The container's root host id matches */ - if (container_host_uid == hostuid) - return log_info(0, "Container root id is mapped to our uid"); - - /* Get the current ids of our target. */ - ret = fstat(target_fd, &st); - if (ret) - return log_error_errno(-errno, errno, "Failed to stat \"%s\"", path); - - hostgid = getegid(); - if (st.st_uid == hostuid && mapped_hostid(st.st_gid, conf, ID_TYPE_GID) < 0) { - ret = fchown(target_fd, -1, hostgid); - if (ret) - return log_error_errno(-errno, errno, - "Failed to fchown(%d(%s), -1, %d)", - target_fd, path, hostgid); - TRACE("Chowned %d(%s) to -1:%d", target_fd, path, hostgid); - } - - idmap = malloc(sizeof(*idmap)); - if (!idmap) - return -ENOMEM; - lxc_list_init(idmap); - - /* "u:0:rootuid:1" */ - ret = add_idmap_entry(idmap, ID_TYPE_UID, 0, container_host_uid, 1); - if (ret < 0) - return log_error_errno(ret, -ret, "Failed to add idmap entry"); - - /* "u:hostuid:hostuid:1" */ - ret = add_idmap_entry(idmap, ID_TYPE_UID, hostuid, hostuid, 1); - if (ret < 0) - return log_error_errno(ret, -ret, "Failed to add idmap entry"); - - /* "g:0:rootgid:1" */ - ret = add_idmap_entry(idmap, ID_TYPE_GID, 0, container_host_gid, 1); - if (ret < 0) - return log_error_errno(ret, -ret, "Failed to add idmap entry"); - - /* "g:hostgid:hostgid:1" */ - ret = add_idmap_entry(idmap, ID_TYPE_GID, hostgid, hostgid, 1); - if (ret < 0) - return log_error_errno(ret, -ret, "Failed to add idmap entry"); - - if (hostgid != st.st_gid) { - /* "g:pathgid:rootgid+pathgid:1" */ - ret = add_idmap_entry(idmap, ID_TYPE_GID, st.st_gid, - container_host_gid + (gid_t)st.st_gid, 1); - if (ret < 0) - return log_error_errno(ret, -ret, "Failed to add idmap entry"); - } - - ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, sock_fds); - if (ret < 0) - return -errno; - - pid = fork(); - if (pid < 0) { - SYSERROR("Failed to create new process"); - goto on_error; - } - - if (pid == 0) { - close_prot_errno_disarm(sock_fds[1]); - - ret = unshare(CLONE_NEWUSER); - if (ret < 0) { - SYSERROR("Failed to unshare new user namespace"); - _exit(EXIT_FAILURE); - } - - ret = lxc_write_nointr(sock_fds[0], &c, 1); - if (ret != 1) - _exit(EXIT_FAILURE); - - ret = lxc_read_nointr(sock_fds[0], &c, 1); - if (ret != 1) - _exit(EXIT_FAILURE); - - close_prot_errno_disarm(sock_fds[0]); - - if (!lxc_switch_uid_gid(0, 0)) - _exit(EXIT_FAILURE); - - if (!lxc_setgroups(0, NULL)) - _exit(EXIT_FAILURE); - - ret = fchown(target_fd, 0, st.st_gid); - if (ret) { - SYSERROR("Failed to chown %d(%s) to -1:%d", target_fd, path, st.st_gid); - _exit(EXIT_FAILURE); - } - - TRACE("Chowned %d(%s) to 0:%d", target_fd, path, st.st_gid); - _exit(EXIT_SUCCESS); - } - - close_prot_errno_disarm(sock_fds[0]); - - if (lxc_log_get_level() == LXC_LOG_LEVEL_TRACE || - conf->loglevel == LXC_LOG_LEVEL_TRACE) { - struct id_map *map; - struct lxc_list *it; - - lxc_list_for_each(it, idmap) { - map = it->elem; - TRACE("Establishing %cid mapping for \"%d\" in new user namespace: nsuid %lu - hostid %lu - range %lu", - (map->idtype == ID_TYPE_UID) ? 'u' : 'g', pid, map->nsid, map->hostid, map->range); - } - } - - ret = lxc_read_nointr(sock_fds[1], &c, 1); - if (ret != 1) { - SYSERROR("Failed waiting for child process %d\" to tell us to proceed", pid); - goto on_error; - } - - /* Set up {g,u}id mapping for user namespace of child process. */ - ret = lxc_map_ids(idmap, pid); - if (ret < 0) { - ERROR("Error setting up {g,u}id mappings for child process \"%d\"", pid); - goto on_error; - } - - /* Tell child to proceed. */ - ret = lxc_write_nointr(sock_fds[1], &c, 1); - if (ret != 1) { - SYSERROR("Failed telling child process \"%d\" to proceed", pid); - goto on_error; - } - -on_error: - close_prot_errno_disarm(sock_fds[0]); - close_prot_errno_disarm(sock_fds[1]); - - /* Wait for child to finish. */ - if (pid < 0) - return -1; - - return wait_for_pid(pid); -} - /* not thread-safe, do not use from api without first forking */ static char *getuname(void) { @@ -4775,3 +6270,89 @@ struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings) return result; } + +#ifdef HAVE_ISULAD +/*isulad clear init args*/ +int lxc_clear_init_args(struct lxc_conf *lxc_conf) +{ + int i; + + for (i = 0; i < lxc_conf->init_argc; i++) { + free(lxc_conf->init_argv[i]); + lxc_conf->init_argv[i] = NULL; + } + free(lxc_conf->init_argv); + lxc_conf->init_argv = NULL; + lxc_conf->init_argc = 0; + + return 0; +} + +/*isulad clear init groups*/ +int lxc_clear_init_groups(struct lxc_conf *lxc_conf) +{ + free(lxc_conf->init_groups); + lxc_conf->init_groups = NULL; + lxc_conf->init_groups_len = 0; + + return 0; +} + +/*isulad: clear populate devices*/ +int lxc_clear_populate_devices(struct lxc_conf *c) +{ + struct lxc_list *it = NULL; + struct lxc_list *next = NULL; + + lxc_list_for_each_safe(it, &c->populate_devs, next) { + struct lxc_populate_devs *dev_elem = it->elem; + lxc_list_del(it); + free(dev_elem->name); + free(dev_elem->type); + free(dev_elem); + free(it); + } + return 0; +} + +/*isulad: clear rootfs masked paths*/ +int lxc_clear_rootfs_masked_paths(struct lxc_conf *c) +{ + struct lxc_list *it = NULL; + struct lxc_list *next = NULL; + + lxc_list_for_each_safe(it, &c->rootfs.maskedpaths, next) { + lxc_list_del(it); + free(it->elem); + free(it); + } + return 0; +} + +/*isulad: clear rootfs ro paths*/ +int lxc_clear_rootfs_ro_paths(struct lxc_conf *c) +{ + struct lxc_list *it = NULL; + struct lxc_list *next = NULL; + + lxc_list_for_each_safe(it, &c->rootfs.ropaths, next) { + lxc_list_del(it); + free(it->elem); + free(it); + } + return 0; +} + +/*isulad: close error pipe */ +void lxc_close_error_pipe(int *errpipe) +{ + if (errpipe[0] >= 0) { + close(errpipe[0]); + errpipe[0] = -1; + } + if (errpipe[1] >= 0) { + close(errpipe[1]); + errpipe[1] = -1; + } +} +#endif diff --git a/src/lxc/conf.h b/src/lxc/conf.h index b72afbaa5..4b6409e3e 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -23,6 +23,10 @@ #include "start.h" #include "terminal.h" +#ifdef HAVE_ISULAD +#include "oci_runtime_hooks.h" +#endif + #if HAVE_SYS_RESOURCE_H #include #endif @@ -60,9 +64,6 @@ struct lxc_cgroup { struct /* meta */ { char *controllers; char *dir; - char *monitor_dir; - char *container_dir; - char *namespace_dir; bool relative; }; }; @@ -146,6 +147,8 @@ struct lxc_tty_info { * @mountflags : the portion of @options that are flags * @data : the portion of @options that are not flags * @managed : whether it is managed by LXC + * @maskedpaths: A list of paths to be msked over inside the container + * @ropaths : A list of paths to be remounted with readonly inside the container */ struct lxc_rootfs { char *path; @@ -155,6 +158,16 @@ struct lxc_rootfs { unsigned long mountflags; char *data; bool managed; + +#ifdef HAVE_ISULAD + /* isulad: maskedpaths */ + struct lxc_list maskedpaths; + /* isulad: ropaths */ + struct lxc_list ropaths; + /* isulad: errfd */ + int errfd; +#endif + }; /* @@ -203,6 +216,11 @@ enum lxchooks { LXCHOOK_CLONE, LXCHOOK_DESTROY, LXCHOOK_START_HOST, +#ifdef HAVE_ISULAD + OCI_HOOK_PRESTART, + OCI_HOOK_POSTSTART, + OCI_HOOK_POSTSTOP, +#endif NUM_LXC_HOOKS }; @@ -233,6 +251,27 @@ struct device_item { int global_rule; }; +#ifdef HAVE_ISULAD +/* + * iSulad: Defines a structure to store the devices which will + * be attached in container + * @name : the target device name in container + * @type : the type of target device "c" or "b" + * @mode : file mode for the device + * @maj : major number for the device + * @min : minor number for the device + */ +struct lxc_populate_devs { + char *name; + char *type; + mode_t file_mode; + int maj; + int min; + uid_t uid; + gid_t gid; +}; +#endif + struct lxc_conf { /* Pointer to the name of the container. Do not free! */ const char *name; @@ -401,11 +440,39 @@ struct lxc_conf { /* Absolute path (in the container) to the shared mount point */ char *path_cont; } shmount; + +#ifdef HAVE_ISULAD + /* + * isulad: support oci hook + * */ + oci_runtime_spec_hooks *ocihooks; + + /* isulad add: init args used to repalce init_cmd*/ + char **init_argv; + size_t init_argc; + + gid_t *init_groups; + size_t init_groups_len; + + /* populate devices*/ + struct lxc_list populate_devs; + mode_t umask; //umask value + + char *container_info_file; + + int exit_fd; /* exit fifo fd*/ + + char *errmsg; /* record error messages */ + + int errpipe[2];//pipdfd for get error message of child or grandchild process. + + char *systemd; //systemd value +#endif + }; extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, - size_t buf_size) -__access_r(3, 4); + size_t buf_size); #ifdef HAVE_TLS extern thread_local struct lxc_conf *current_config; @@ -439,19 +506,22 @@ extern int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, const char *name, const char *lxcpath); extern int lxc_setup(struct lxc_handler *handler); extern int lxc_setup_parent(struct lxc_handler *handler); +#ifdef HAVE_ISULAD +extern int setup_resource_limits(struct lxc_list *limits, pid_t pid, int errfd); +#else extern int setup_resource_limits(struct lxc_list *limits, pid_t pid); +#endif extern int find_unmapped_nsid(const struct lxc_conf *conf, enum idtype idtype); extern int mapped_hostid(unsigned id, const struct lxc_conf *conf, enum idtype idtype); +extern int chown_mapped_root(const char *path, const struct lxc_conf *conf); extern int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data, const char *fn_name); extern int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), void *data, const char *fn_name); -extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, - char **mntdata); extern int parse_propagationopts(const char *mntopts, unsigned long *pflags); extern void tmp_proc_unmount(struct lxc_conf *lxc_conf); -extern void turn_into_dependent_mounts(void); +extern void remount_all_slave(void); extern void suggest_default_idmap(void); extern FILE *make_anonymous_mount_file(struct lxc_list *mount, bool include_nesting_helpers); @@ -473,11 +543,24 @@ extern int lxc_clear_namespace(struct lxc_conf *c); extern int userns_exec_minimal(const struct lxc_conf *conf, int (*fn_parent)(void *), void *fn_parent_data, int (*fn_child)(void *), void *fn_child_data); -extern int userns_exec_mapped_root(const char *path, int path_fd, - const struct lxc_conf *conf); -static inline int chown_mapped_root(const char *path, const struct lxc_conf *conf) -{ - return userns_exec_mapped_root(path, -EBADF, conf); -} +#ifdef HAVE_ISULAD +// isulad modify +extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, + unsigned long *pflags, char **mntdata); +#else +extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, + char **mntdata); +#endif +#ifdef HAVE_ISULAD +// isulad add +int lxc_clear_init_args(struct lxc_conf *lxc_conf); +int lxc_clear_init_groups(struct lxc_conf *lxc_conf); +int lxc_clear_populate_devices(struct lxc_conf *c); +int lxc_clear_rootfs_masked_paths(struct lxc_conf *c); +int lxc_clear_rootfs_ro_paths(struct lxc_conf *c); +int lxc_drop_caps(struct lxc_conf *conf); +int run_oci_hooks(const char *name, const char *hookname, struct lxc_conf *conf, const char *lxcpath); +void lxc_close_error_pipe(int *errpipe); +#endif #endif /* __LXC_CONF_H */ diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 4c27e7d4b..b1d101a9d 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -147,6 +147,18 @@ lxc_config_define(tty_dir); lxc_config_define(uts_name); lxc_config_define(sysctl); lxc_config_define(proc); +#ifdef HAVE_ISULAD +lxc_config_define(init_args); +lxc_config_define(init_groups); +lxc_config_define(populate_device); +lxc_config_define(umask); +lxc_config_define(rootfs_masked_paths); +lxc_config_define(rootfs_ro_paths); +lxc_config_define(systemd); +lxc_config_define(console_log_driver); +lxc_config_define(console_syslog_tag); +lxc_config_define(console_syslog_facility); +#endif /* * Important Note: @@ -259,6 +271,18 @@ static struct lxc_config_t config_jump_table[] = { { "lxc.uts.name", set_config_uts_name, get_config_uts_name, clr_config_uts_name, }, { "lxc.sysctl", set_config_sysctl, get_config_sysctl, clr_config_sysctl, }, { "lxc.proc", set_config_proc, get_config_proc, clr_config_proc, }, +#ifdef HAVE_ISULAD + { "lxc.isulad.init.args", set_config_init_args, get_config_init_args, clr_config_init_args, }, + { "lxc.isulad.init.groups", set_config_init_groups, get_config_init_groups, clr_config_init_groups, }, + { "lxc.isulad.populate.device", set_config_populate_device, get_config_populate_device, clr_config_populate_device, }, + { "lxc.isulad.umask", set_config_umask, get_config_umask, clr_config_umask, }, + { "lxc.isulad.rootfs.maskedpaths", set_config_rootfs_masked_paths, get_config_rootfs_masked_paths, clr_config_rootfs_masked_paths, }, + { "lxc.isulad.rootfs.ropaths", set_config_rootfs_ro_paths, get_config_rootfs_ro_paths, clr_config_rootfs_ro_paths, }, + { "lxc.isulad.systemd", set_config_systemd, get_config_systemd, clr_config_systemd, }, + { "lxc.console.logdriver", set_config_console_log_driver, get_config_console_log_driver, clr_config_console_log_driver, }, + { "lxc.console.syslog_tag", set_config_console_syslog_tag, get_config_console_syslog_tag, clr_config_console_syslog_tag, }, + { "lxc.console.syslog_facility", set_config_console_syslog_facility, get_config_console_syslog_facility, clr_config_console_syslog_facility, }, +#endif }; static const size_t config_jump_table_size = sizeof(config_jump_table) / sizeof(struct lxc_config_t); @@ -300,18 +324,14 @@ static int set_config_net_type(const char *key, const char *value, netdev->type = LXC_NET_VETH; lxc_list_init(&netdev->priv.veth_attr.ipv4_routes); lxc_list_init(&netdev->priv.veth_attr.ipv6_routes); - if (!lxc_veth_flag_to_mode(netdev->priv.veth_attr.mode)) - lxc_veth_mode_to_flag(&netdev->priv.veth_attr.mode, "bridge"); + lxc_veth_mode_to_flag(&netdev->priv.veth_attr.mode, "bridge"); } else if (strcmp(value, "macvlan") == 0) { netdev->type = LXC_NET_MACVLAN; - if (!lxc_macvlan_flag_to_mode(netdev->priv.veth_attr.mode)) - lxc_macvlan_mode_to_flag(&netdev->priv.macvlan_attr.mode, "private"); + lxc_macvlan_mode_to_flag(&netdev->priv.macvlan_attr.mode, "private"); } else if (strcmp(value, "ipvlan") == 0) { netdev->type = LXC_NET_IPVLAN; - if (!lxc_ipvlan_flag_to_mode(netdev->priv.ipvlan_attr.mode)) - lxc_ipvlan_mode_to_flag(&netdev->priv.ipvlan_attr.mode, "l3"); - if (!lxc_ipvlan_flag_to_isolation(netdev->priv.ipvlan_attr.isolation)) - lxc_ipvlan_isolation_to_flag(&netdev->priv.ipvlan_attr.isolation, "bridge"); + lxc_ipvlan_mode_to_flag(&netdev->priv.ipvlan_attr.mode, "l3"); + lxc_ipvlan_isolation_to_flag(&netdev->priv.ipvlan_attr.isolation, "bridge"); } else if (strcmp(value, "vlan") == 0) { netdev->type = LXC_NET_VLAN; } else if (strcmp(value, "phys") == 0) { @@ -1348,6 +1368,10 @@ static int set_config_environment(const char *key, const char *value, { struct lxc_list *list_item = NULL; +#ifdef HAVE_ISULAD + char *replaced = NULL; +#endif + if (lxc_config_value_empty(value)) return lxc_clear_environment(lxc_conf); @@ -1368,7 +1392,16 @@ static int set_config_environment(const char *key, const char *value, env_var[1] = env_val; list_item->elem = lxc_string_join("=", env_var, false); } else { +#ifdef HAVE_ISULAD + /* isulad: recover space replaced by SPACE_MAGIC_STR */ + replaced = lxc_string_replace(SPACE_MAGIC_STR, " ", value); + if(!replaced) + goto on_error; + + list_item->elem = replaced; +#else list_item->elem = strdup(value); +#endif } if (!list_item->elem) @@ -2291,11 +2324,14 @@ static int set_config_console_rotate(const char *key, const char *value, if (lxc_safe_uint(value, &lxc_conf->console.log_rotate) < 0) return -1; +#ifndef HAVE_ISULAD + /* isulad: support rotate muti-files */ if (lxc_conf->console.log_rotate > 1) { ERROR("The \"lxc.console.rotate\" config key can only be set " "to 0 or 1"); return -1; } +#endif return 0; } @@ -2581,6 +2617,11 @@ static int set_config_rootfs_options(const char *key, const char *value, int ret; struct lxc_rootfs *rootfs = &lxc_conf->rootfs; +#ifdef HAVE_ISULAD + ret = parse_mntopts(value, &mflags, &pflags, &mdata); + if (ret < 0) + return -EINVAL; +#else ret = parse_mntopts(value, &mflags, &mdata); if (ret < 0) return -EINVAL; @@ -2590,6 +2631,7 @@ static int set_config_rootfs_options(const char *key, const char *value, free(mdata); return -EINVAL; } +#endif ret = set_config_string_item(&opts, value); if (ret < 0) { @@ -2722,6 +2764,54 @@ struct parse_line_conf { bool from_include; }; +#ifdef HAVE_ISULAD +// escape_string_decode compress some escape characters +static char *escape_string_decode(const char *src) +{ + size_t src_end = 0; + size_t dst_end = 0; + size_t len = 0; + char *dst = NULL; + + if (src == NULL) { + return NULL; + } + + len = strlen(src); + if (len == 0) { + return NULL; + } + + dst = calloc(1, len + 1); + if (dst == NULL) { + ERROR("Out of memory"); + return NULL; + } + + while(src_end < len) { + if (src[src_end] == '\\') { + switch (src[++src_end]) + { + case 'r': dst[dst_end] = '\r'; break; + case 'n': dst[dst_end] = '\n'; break; + case 'f': dst[dst_end] = '\f'; break; + case 'b': dst[dst_end] = '\b'; break; + case 't': dst[dst_end] = '\t'; break; + case '\\': dst[dst_end] = '\\'; break; + // default do not decode + default: dst[dst_end++] = '\\'; dst[dst_end] = src[src_end]; break; + } + } else { + dst[dst_end] = src[src_end]; + } + dst_end++; + src_end++; + } + + return dst; +} +#endif + static int parse_line(char *buffer, void *data) { char *dot, *key, *line, *linep, *value; @@ -2730,6 +2820,9 @@ static int parse_line(char *buffer, void *data) int ret = 0; char *dup = buffer; struct parse_line_conf *plc = data; +#ifdef HAVE_ISULAD + char *value_decode = NULL; +#endif /* If there are newlines in the config file we should keep them. */ empty_line = lxc_is_line_empty(dup); @@ -2796,10 +2889,21 @@ static int parse_line(char *buffer, void *data) goto on_error; } +#ifdef HAVE_ISULAD + value_decode = escape_string_decode(value); + if (value_decode == NULL) { + ERROR("Value %s decode failed", value); + } + ret = config->set(key, value_decode ? value_decode: value, plc->conf, NULL); +#else ret = config->set(key, value, plc->conf, NULL); +#endif on_error: free(linep); +#ifdef HAVE_ISULAD + free(value_decode); +#endif return ret; } @@ -4192,7 +4296,12 @@ static int get_config_prlimit(const char *key, char *retv, int inlen, lxc_list_for_each(it, &c->limits) { /* 2 colon separated 64 bit integers or the word 'unlimited' */ +#ifdef HAVE_ISULAD +#define MAX_LIMIT_BUF_LEN ((INTTYPE_TO_STRLEN(uint64_t) * 2) + 2) + char buf[MAX_LIMIT_BUF_LEN] = { 0 }; +#else char buf[INTTYPE_TO_STRLEN(uint64_t) * 2 + 2]; +#endif int partlen; struct lxc_limit *lim = it->elem; @@ -4200,17 +4309,34 @@ static int get_config_prlimit(const char *key, char *retv, int inlen, memcpy(buf, "unlimited", STRLITERALLEN("unlimited") + 1); partlen = STRLITERALLEN("unlimited"); } else { +#ifdef HAVE_ISULAD + partlen = snprintf(buf, MAX_LIMIT_BUF_LEN, "%" PRIu64, (uint64_t)lim->limit.rlim_cur); + if (partlen < 0 || partlen >= MAX_LIMIT_BUF_LEN) { + return -1; + } +#else partlen = sprintf(buf, "%" PRIu64, (uint64_t)lim->limit.rlim_cur); +#endif } if (lim->limit.rlim_cur != lim->limit.rlim_max) { if (lim->limit.rlim_max == RLIM_INFINITY) memcpy(buf + partlen, ":unlimited", STRLITERALLEN(":unlimited") + 1); +#ifdef HAVE_ISULAD + else { + int nret = snprintf(buf + partlen, (MAX_LIMIT_BUF_LEN - partlen), + ":%" PRIu64, (uint64_t)lim->limit.rlim_max); + if (nret < 0 || nret >= (MAX_LIMIT_BUF_LEN - partlen)) { + return -1; + } + } +#else else sprintf(buf + partlen, ":%" PRIu64, (uint64_t)lim->limit.rlim_max); +#endif } if (get_all) { @@ -6098,3 +6224,506 @@ int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, int inlen) return fulllen; } + +#ifdef HAVE_ISULAD +/* isulad: set config for init args */ +static int set_config_init_args(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + int ret = 0; + char *tmp = NULL; + char *new_value = NULL; + + ret = set_config_string_item(&new_value, value); + if (ret || !new_value) + return ret; + + tmp = realloc(lxc_conf->init_argv, (lxc_conf->init_argc + 1) * sizeof(char *)); + if (!tmp) { + ERROR("Out of memory"); + free(new_value); + return -1; + } + + lxc_conf->init_argv = (char **)tmp; + + lxc_conf->init_argv[lxc_conf->init_argc] = new_value; + lxc_conf->init_argc++; + + return 0; +} + +/* isulad: get config init args */ +static int get_config_init_args(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + int i, len, fulllen = 0; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + for (i = 0; i < c->init_argc; i++) { + strprint(retv, inlen, "%s", c->init_argv[i]); + } + + return fulllen; +} + +/* isulad: clr config init args*/ +static inline int clr_config_init_args(const char *key, struct lxc_conf *c, + void *data) +{ + return lxc_clear_init_args(c); +} + +/* isulad: set config for init groups */ +static int set_config_init_groups(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + char *groups = NULL; + char *token = NULL; + int ret = -1; + + if (lxc_config_value_empty(value)) + return lxc_clear_init_groups(lxc_conf); + + groups = strdup(value); + if (!groups) + return -1; + + /* In case several capability keep is specified in a single line + * split these caps in a single element for the list. + */ + lxc_iterate_parts(token, groups, " \t") { + gid_t *tmp = NULL; + if (lxc_mem_realloc((void **)&tmp, (lxc_conf->init_groups_len + 1) * sizeof(gid_t), lxc_conf->init_groups, + (lxc_conf->init_groups_len) * sizeof(gid_t)) != 0) { + ERROR("Out of memory"); + goto on_error; + } + lxc_conf->init_groups = tmp; + tmp[lxc_conf->init_groups_len] = atoll(token); + lxc_conf->init_groups_len++; + } + + ret = 0; + +on_error: + free(groups); + + return ret; +} + +/* isulad: get config init groups */ +static int get_config_init_groups(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + int i, len, fulllen = 0; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + for (i = 0; i < c->init_groups_len; i++) { + strprint(retv, inlen, "%u\n", c->init_groups[i]); + } + + return fulllen; +} + +/* isulad: clr config init args*/ +static inline int clr_config_init_groups(const char *key, struct lxc_conf *c, + void *data) +{ + return lxc_clear_init_groups(c); +} + +/* isulad: set config for populate device */ +static int set_config_populate_device(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + int ret = 0, major = 0, minor = 0; + uid_t uid = (uid_t)-1; + gid_t gid = (gid_t)-1; + char name[4096] = {0}; /* MAX dev path name */ + char type[3] = {0}; + char *replace_value = NULL; + mode_t filemode = 0; + struct lxc_list *iter = NULL; + struct lxc_list *dev_list = NULL; + struct lxc_populate_devs *dev_elem = NULL; + + if (lxc_config_value_empty(value)) + return lxc_clear_populate_devices(lxc_conf); + + /* lxc.populate.device = PATH_IN_CONTAINER:DEVICETYPE:MAJOR:MINOR:MODE:UID:GID + * For e.g. lxc.populate.device = /dev/sda:b:8:0:0666:0:0 + */ + ret = sscanf(value, "%4095[^:]:%2[^:]:%i:%i:%i:%u:%u", name, type, &major, &minor, &filemode, &uid, &gid); + if (ret != 7) + return -1; + + /* find existing list element */ + lxc_list_for_each(iter, &lxc_conf->populate_devs) { + dev_elem = iter->elem; + + if (strcmp(name, dev_elem->name) != 0) + continue; + + replace_value = safe_strdup(type); + + free(dev_elem->type); + dev_elem->type = replace_value; + dev_elem->file_mode = filemode; + dev_elem->maj = major; + dev_elem->min = minor; + dev_elem->uid = (uid_t)uid; + dev_elem->gid = (gid_t)gid; + return 0; + } + + /* allocate list element */ + dev_list = malloc(sizeof(*dev_list)); + if (dev_list == NULL) + goto on_error; + + lxc_list_init(dev_list); + + dev_elem = malloc(sizeof(*dev_elem)); + if (dev_elem == NULL) + goto on_error; + memset(dev_elem, 0, sizeof(*dev_elem)); + + dev_elem->name = safe_strdup(name); + + dev_elem->type = safe_strdup(type); + + dev_elem->file_mode = filemode; + dev_elem->maj = major; + dev_elem->min = minor; + dev_elem->uid = (uid_t)uid; + dev_elem->gid = (gid_t)gid; + + lxc_list_add_elem(dev_list, dev_elem); + + lxc_list_add_tail(&lxc_conf->populate_devs, dev_list); + + return 0; + +on_error: + free(dev_list); + if (dev_elem) { + free(dev_elem->name); + free(dev_elem->type); + free(dev_elem); + } + return -1; +} + +/* isulad: get config populate device + * If you ask for 'lxc.populate.device', then all populate device + * entries will be printed, in 'lxc.populate.device = path_in_container:type:major:minor:mode:uid:gid' format. + * For e.g. lxc.populate.device = /dev/sda:b:8:0:0666:0:0 + */ +static int get_config_populate_device(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + int len; + struct lxc_list *it = NULL; + int fulllen = 0; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + lxc_list_for_each(it, &c->populate_devs) { + struct lxc_populate_devs *elem = it->elem; + strprint(retv, inlen, "lxc.populate.device = %s:%s:%d:%d:%o:%u:%u\n", + elem->name, elem->type, elem->maj, + elem->min, elem->file_mode, elem->uid, elem->gid); + } + + return fulllen; +} + +/* isulad: clr config populate devices*/ +static inline int clr_config_populate_device(const char *key, struct lxc_conf *c, + void *data) +{ + return lxc_clear_populate_devices(c); +} + +/* isulad: set config for umask */ +static int set_config_umask(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + if (lxc_config_value_empty(value)) { + ERROR("Empty umask"); + return -1; + } + + if (strcmp(value, "normal") == 0) { + lxc_conf->umask = 0022; + return 0; + } else if (strcmp(value, "secure") == 0) { + lxc_conf->umask = 0027; + return 0; + } else { + ERROR("Invalid native umask: %s", value); + return -1; + } +} + +/* isulad add: get umask value*/ +static int get_config_umask(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + return lxc_get_conf_size_t(c, retv, inlen, c->umask); +} + +/* isulad add: clear umask value */ +static inline int clr_config_umask(const char *key, struct lxc_conf *c, + void *data) +{ + c->umask = 0027; + return 0; +} + +/* isulad: set config for rootfs masked paths */ +static int set_config_rootfs_masked_paths(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + struct lxc_list *list_item = NULL; + + if (lxc_config_value_empty(value)) + return lxc_clear_rootfs_masked_paths(lxc_conf); + + list_item = malloc(sizeof(*list_item)); + if (list_item == NULL) + goto on_error; + + list_item->elem = safe_strdup(value); + + lxc_list_add_tail(&lxc_conf->rootfs.maskedpaths, list_item); + + return 0; + +on_error: + free(list_item); + + return -1; +} + +// isulad: get config rootfs masked paths +static int get_config_rootfs_masked_paths(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + int len, fulllen = 0; + struct lxc_list *it = NULL; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + lxc_list_for_each(it, &c->rootfs.maskedpaths) { + strprint(retv, inlen, "%s\n", (char *)it->elem); + } + + return fulllen; +} + +/* isulad: set config for rootfs ro paths */ +static int set_config_rootfs_ro_paths(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + struct lxc_list *list_item = NULL; + + if (lxc_config_value_empty(value)) + return lxc_clear_rootfs_ro_paths(lxc_conf); + + list_item = malloc(sizeof(*list_item)); + if (list_item == NULL) + goto on_error; + + list_item->elem = safe_strdup(value); + + lxc_list_add_tail(&lxc_conf->rootfs.ropaths, list_item); + + return 0; + +on_error: + free(list_item); + + return -1; +} + +// isulad: get config rootfs ro paths +static int get_config_rootfs_ro_paths(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + int len, fulllen = 0; + struct lxc_list *it = NULL; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + lxc_list_for_each(it, &c->rootfs.ropaths) { + strprint(retv, inlen, "%s\n", (char *)it->elem); + } + + return fulllen; +} + +/* isulad: clr config rootfs masked paths */ +static inline int clr_config_rootfs_masked_paths(const char *key, struct lxc_conf *c, + void *data) +{ + return lxc_clear_rootfs_masked_paths(c); +} + +/* isulad: clr config rootfs ro paths */ +static inline int clr_config_rootfs_ro_paths(const char *key, struct lxc_conf *c, + void *data) +{ + return lxc_clear_rootfs_ro_paths(c); +} + +/* isulad: set config for systemd */ +static int set_config_systemd(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + if (lxc_config_value_empty(value)) { + ERROR("Empty umask"); + return -1; + } + lxc_conf->systemd = strdup(value); + return 0; +} + +/* isulad add: get systemd value*/ +static int get_config_systemd(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + return lxc_get_conf_str(retv, inlen, c->systemd); +} + +/* isulad add: clear systemd value */ +static inline int clr_config_systemd(const char *key, struct lxc_conf *c, + void *data) +{ + free(c->systemd); + c->systemd = NULL; + return 0; +} + +static int set_config_console_log_driver(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + return set_config_string_item(&lxc_conf->console.log_driver, value); +} + +static int set_config_console_syslog_tag(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + char buf[16] = { 0 }; + + if (value == NULL) { + return -1; + } + (void)strlcpy(buf, value, 16); + return set_config_string_item(&lxc_conf->console.log_syslog_tag, buf); +} + +static int parse_facility(const char *facility) +{ +#define FACILITIES_LEN 20 + const char *facility_keys[FACILITIES_LEN] = { + "kern", "user", "mail", "daemon", "auth", + "syslog", "lpr", "news", "uucp", "cron", "authpriv", "ftp", + "local0", "local1", "local2", "local3", "local4", "local5", "local6", "local7" + }; + const int facilities[FACILITIES_LEN] = { + LOG_KERN, LOG_USER, LOG_MAIL, LOG_DAEMON, LOG_AUTH, LOG_SYSLOG, + LOG_LPR, LOG_NEWS, LOG_UUCP, LOG_CRON, LOG_AUTHPRIV, LOG_FTP, + LOG_LOCAL0, LOG_LOCAL1, LOG_LOCAL2, LOG_LOCAL3, LOG_LOCAL4, + LOG_LOCAL5, LOG_LOCAL6, LOG_LOCAL7 + }; + int i = 0; + + if (facility == NULL) { + return -1; + } + + for (; i < FACILITIES_LEN; i++) { + if (strcmp(facility, facility_keys[i]) == 0) { + return facilities[i]; + } + } + + return -1; +} + +static int set_config_console_syslog_facility(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + int facility; + + facility = parse_facility(value); + if (facility < 0) { + NOTICE("Invalid facility: %s", value); + facility = LOG_DAEMON; + } + + lxc_conf->console.log_syslog_facility = facility; + return 0; +} + +static int get_config_console_log_driver(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + return lxc_get_conf_str(retv, inlen, c->console.log_driver); +} + +static int get_config_console_syslog_tag(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + return lxc_get_conf_str(retv, inlen, c->console.log_syslog_tag); +} + +static int get_config_console_syslog_facility(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + return lxc_get_conf_int(c, retv, inlen, c->console.log_syslog_facility); +} + +static inline int clr_config_console_log_driver(const char *key, + struct lxc_conf *c, void *data) +{ + free(c->console.log_driver); + c->console.log_driver = NULL; + return 0; +} + +static inline int clr_config_console_syslog_tag(const char *key, + struct lxc_conf *c, void *data) +{ + free(c->console.log_syslog_tag); + c->console.log_syslog_tag= NULL; + return 0; +} + +static inline int clr_config_console_syslog_facility(const char *key, + struct lxc_conf *c, void *data) +{ + c->console.log_syslog_facility = LOG_DAEMON; + return 0; +} + +#endif diff --git a/src/lxc/confile.h b/src/lxc/confile.h index a457c9a17..624d9a0c2 100644 --- a/src/lxc/confile.h +++ b/src/lxc/confile.h @@ -9,8 +9,6 @@ #include #include -#include "compiler.h" - struct lxc_conf; struct lxc_list; @@ -48,24 +46,21 @@ struct new_config_item { extern struct lxc_config_t *lxc_get_config(const char *key); /* List all available config items. */ -extern int lxc_list_config_items(char *retv, int inlen) -__access_rw(1, 2); +extern int lxc_list_config_items(char *retv, int inlen); /* Given a configuration key namespace (e.g. lxc.apparmor) list all associated * subkeys for that namespace. * Must be implemented when adding a new configuration key. */ extern int lxc_list_subkeys(struct lxc_conf *conf, const char *key, char *retv, - int inlen) -__access_rw(3, 4); + int inlen); /* List all configuration items associated with a given network. For example * pass "lxc.net.[i]" to retrieve all configuration items associated with * the network associated with index [i]. */ extern int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, - int inlen) -__access_rw(3, 4); + int inlen); extern int lxc_config_read(const char *file, struct lxc_conf *conf, bool from_include); diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c index 05dadf9ec..ff4ae7688 100644 --- a/src/lxc/confile_utils.c +++ b/src/lxc/confile_utils.c @@ -506,18 +506,6 @@ int lxc_veth_mode_to_flag(int *mode, const char *value) return ret_set_errno(-1, EINVAL); } -char *lxc_veth_flag_to_mode(int mode) -{ - for (size_t i = 0; i < sizeof(veth_mode) / sizeof(veth_mode[0]); i++) { - if (veth_mode[i].mode != mode) - continue; - - return veth_mode[i].name; - } - - return NULL; -} - static struct lxc_macvlan_mode { char *name; int mode; diff --git a/src/lxc/confile_utils.h b/src/lxc/confile_utils.h index 7c59deae5..62990e98c 100644 --- a/src/lxc/confile_utils.h +++ b/src/lxc/confile_utils.h @@ -5,7 +5,6 @@ #include -#include "compiler.h" #include "conf.h" #include "confile_utils.h" @@ -41,7 +40,6 @@ extern void lxc_log_configured_netdevs(const struct lxc_conf *conf); extern bool lxc_remove_nic_by_idx(struct lxc_conf *conf, unsigned int idx); extern void lxc_free_networks(struct lxc_list *networks); extern int lxc_veth_mode_to_flag(int *mode, const char *value); -extern char *lxc_veth_flag_to_mode(int mode); extern int lxc_macvlan_mode_to_flag(int *mode, const char *value); extern char *lxc_macvlan_flag_to_mode(int mode); extern int lxc_ipvlan_mode_to_flag(int *mode, const char *value); @@ -51,16 +49,12 @@ extern char *lxc_ipvlan_flag_to_isolation(int mode); extern int set_config_string_item(char **conf_item, const char *value); extern int set_config_string_item_max(char **conf_item, const char *value, - size_t max) -__access_r(2, 3); - + size_t max); extern int set_config_path_item(char **conf_item, const char *value); extern int set_config_bool_item(bool *conf_item, const char *value, bool empty_conf_action); extern int config_ip_prefix(struct in_addr *addr); -extern int network_ifname(char *valuep, const char *value, size_t size) -__access_r(2, 3); - +extern int network_ifname(char *valuep, const char *value, size_t size); extern void rand_complete_hwaddr(char *hwaddr); extern bool lxc_config_net_is_hwaddr(const char *line); extern bool new_hwaddr(char *hwaddr); diff --git a/src/lxc/criu.c b/src/lxc/criu.c index 19f2a173f..14a8aae7d 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c @@ -303,7 +303,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, * the handler the restore task created. */ if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) { - path = lxc_cmd_get_limiting_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]); + path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]); if (!path) { ERROR("failed to get cgroup path for %s", controllers[0]); goto err; @@ -311,7 +311,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, } else { const char *p; - p = cgroup_ops->get_limiting_cgroup(cgroup_ops, controllers[0]); + p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]); if (!p) { ERROR("failed to get cgroup path for %s", controllers[0]); goto err; @@ -371,8 +371,15 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, char *mntdata = NULL; char arg[2 * PATH_MAX + 2]; +#ifdef HAVE_ISULAD + unsigned long pflags; + + if (parse_mntopts(mntent.mnt_opts, &flags, &pflags, &mntdata) < 0) + goto err; +#else if (parse_mntopts(mntent.mnt_opts, &flags, &mntdata) < 0) goto err; +#endif free(mntdata); @@ -406,9 +413,9 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, DECLARE_ARG("-t"); DECLARE_ARG(pid); - freezer_relative = lxc_cmd_get_limiting_cgroup_path(opts->c->name, - opts->c->config_path, - "freezer"); + freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name, + opts->c->config_path, + "freezer"); if (!freezer_relative) { ERROR("failed getting freezer path"); goto err; @@ -942,7 +949,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ close(fd); } - handler = lxc_init_handler(NULL, c->name, c->lxc_conf, c->config_path, false); + handler = lxc_init_handler(c->name, c->lxc_conf, c->config_path, false); if (!handler) goto out; @@ -1011,7 +1018,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ } if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) { - (void)rmdir(rootfs->mount); + rmdir(rootfs->mount); goto out_fini_handler; } } @@ -1020,7 +1027,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ os.action = "restore"; os.user = opts; os.c = c; - os.console_fd = c->lxc_conf->console.pts; + os.console_fd = c->lxc_conf->console.slave; os.criu_version = criu_version; os.handler = handler; @@ -1046,7 +1053,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ /* exec_criu() returning is an error */ exec_criu(cgroup_ops, c->lxc_conf, &os); umount(rootfs->mount); - (void)rmdir(rootfs->mount); + rmdir(rootfs->mount); goto out_fini_handler; } else { char title[2048]; @@ -1323,7 +1330,7 @@ static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *op fail: close(criuout[0]); close(criuout[1]); - (void)rmdir(opts->directory); + rmdir(opts->directory); free(criu_version); return false; } diff --git a/src/lxc/exec_commands.c b/src/lxc/exec_commands.c new file mode 100644 index 000000000..00129cb0e --- /dev/null +++ b/src/lxc/exec_commands.c @@ -0,0 +1,416 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + * Author: lifeng + * Create: 2019-12-08 + * Description: provide container definition + * lxc: linux Container library + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ******************************************************************************/ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_unix.h" +#include "cgroup.h" +#include "exec_commands.h" +#include "commands_utils.h" +#include "conf.h" +#include "config.h" +#include "confile.h" +#include "log.h" +#include "lxc.h" +#include "lxclock.h" +#include "mainloop.h" +#include "monitor.h" +#include "terminal.h" +#include "utils.h" + +lxc_log_define(commands_exec, lxc); + +static const char *lxc_exec_cmd_str(lxc_exec_cmd_t cmd) +{ + static const char *const cmdname[LXC_EXEC_CMD_MAX] = { + [LXC_EXEC_CMD_SET_TERMINAL_WINCH] = "set_exec_terminal_winch", + }; + + if (cmd >= LXC_EXEC_CMD_MAX) + return "Invalid request"; + + return cmdname[cmd]; +} + +static int lxc_exec_cmd_rsp_recv(int sock, struct lxc_exec_cmd_rr *cmd) +{ + int ret, rspfd; + struct lxc_exec_cmd_rsp *rsp = &cmd->rsp; + + ret = lxc_abstract_unix_recv_fds_timeout(sock, &rspfd, 1, rsp, sizeof(*rsp), 1000 * 1000); + if (ret < 0) { + SYSERROR("Failed to receive response for command \"%s\"", + lxc_exec_cmd_str(cmd->req.cmd)); + + if (errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK) { + errno = ECONNRESET; /*isulad set errno ECONNRESET when timeout */ + return -1; + } + + return -1; + } + TRACE("Command \"%s\" received response", lxc_exec_cmd_str(cmd->req.cmd)); + + if (rsp->datalen == 0) { + DEBUG("Response data length for command \"%s\" is 0", + lxc_exec_cmd_str(cmd->req.cmd)); + return ret; + } + + if (rsp->datalen > LXC_CMD_DATA_MAX) { + ERROR("Response data for command \"%s\" is too long: %d bytes > %d", + lxc_exec_cmd_str(cmd->req.cmd), rsp->datalen, LXC_CMD_DATA_MAX); + return -1; + } + + rsp->data = malloc(rsp->datalen); + if (!rsp->data) { + errno = ENOMEM; + ERROR("Failed to allocate response buffer for command \"%s\"", + lxc_exec_cmd_str(cmd->req.cmd)); + return -1; + } + + ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0); + if (ret != rsp->datalen) { + SYSERROR("Failed to receive response data for command \"%s\"", + lxc_exec_cmd_str(cmd->req.cmd)); + return -1; + } + + return ret; +} + +static int lxc_exec_cmd_rsp_send(int fd, struct lxc_exec_cmd_rsp *rsp) +{ + ssize_t ret; + + errno = EMSGSIZE; + ret = lxc_send_nointr(fd, rsp, sizeof(*rsp), MSG_NOSIGNAL); + if (ret < 0 || (size_t)ret != sizeof(*rsp)) { + SYSERROR("Failed to send command response %zd", ret); + return -1; + } + + if (!rsp->data || rsp->datalen <= 0) + return 0; + + errno = EMSGSIZE; + ret = lxc_send_nointr(fd, rsp->data, rsp->datalen, MSG_NOSIGNAL); + if (ret < 0 || ret != (ssize_t)rsp->datalen) { + SYSWARN("Failed to send command response data %zd", ret); + return -1; + } + + return 0; +} + +static int lxc_exec_cmd_send(const char *name, struct lxc_exec_cmd_rr *cmd, + const char *lxcpath, const char *hashed_sock_name, const char *suffix) +{ + int client_fd, saved_errno; + ssize_t ret = -1; + + client_fd = lxc_cmd_connect(name, lxcpath, hashed_sock_name, suffix); + if (client_fd < 0) + return -1; + + ret = lxc_abstract_unix_send_credential(client_fd, &cmd->req, + sizeof(cmd->req)); + if (ret < 0 || (size_t)ret != sizeof(cmd->req)) + goto on_error; + + if (cmd->req.datalen <= 0) + return client_fd; + + errno = EMSGSIZE; + ret = lxc_send_nointr(client_fd, (void *)cmd->req.data, + cmd->req.datalen, MSG_NOSIGNAL); + if (ret < 0 || ret != (ssize_t)cmd->req.datalen) + goto on_error; + + return client_fd; + +on_error: + saved_errno = errno; + close(client_fd); + errno = saved_errno; + + return -1; +} + +static int lxc_exec_cmd(const char *name, struct lxc_exec_cmd_rr *cmd, const char *lxcpath, const char *hashed_sock_name, const char *suffix) +{ + int client_fd = -1; + int saved_errno; + int ret = -1; + + client_fd = lxc_exec_cmd_send(name, cmd, lxcpath, hashed_sock_name, suffix); + if (client_fd < 0) { + SYSTRACE("Command \"%s\" failed to connect command socket", + lxc_exec_cmd_str(cmd->req.cmd)); + return -1; + } + + ret = lxc_exec_cmd_rsp_recv(client_fd, cmd); + + saved_errno = errno; + close(client_fd); + errno = saved_errno; + return ret; +} + +int lxc_exec_cmd_set_terminal_winch(const char *name, const char *lxcpath, const char *suffix, unsigned int height, unsigned int width) +{ + int ret = 0; + struct lxc_exec_cmd_set_terminal_winch_request data = { 0 }; + + data.height = height; + data.width = width; + + struct lxc_exec_cmd_rr cmd = { + .req = { + .cmd = LXC_EXEC_CMD_SET_TERMINAL_WINCH, + .datalen = sizeof(struct lxc_exec_cmd_set_terminal_winch_request), + .data = &data, + }, + }; + + ret = lxc_exec_cmd(name, &cmd, lxcpath, NULL, suffix); + if (ret < 0) { + ERROR("Failed to send command to container"); + return -1; + } + + if (cmd.rsp.ret != 0) { + ERROR("Command response error:%d", cmd.rsp.ret); + return -1; + } + return 0; +} + +static int lxc_exec_cmd_set_terminal_winch_callback(int fd, struct lxc_exec_cmd_req *req, + struct lxc_exec_command_handler *handler) +{ + struct lxc_exec_cmd_rsp rsp; + struct lxc_exec_cmd_set_terminal_winch_request *data = (struct lxc_exec_cmd_set_terminal_winch_request *)(req->data); + memset(&rsp, 0, sizeof(rsp)); + + rsp.ret = lxc_set_terminal_winsz(handler->terminal, data->height, data->width);; + + return lxc_exec_cmd_rsp_send(fd, &rsp); + +} + +static int lxc_exec_cmd_process(int fd, struct lxc_exec_cmd_req *req, + struct lxc_exec_command_handler *handler) +{ + typedef int (*callback)(int, struct lxc_exec_cmd_req *, struct lxc_exec_command_handler *); + + callback cb[LXC_EXEC_CMD_MAX] = { + [LXC_EXEC_CMD_SET_TERMINAL_WINCH] = lxc_exec_cmd_set_terminal_winch_callback, + }; + + if (req->cmd >= LXC_EXEC_CMD_MAX) { + ERROR("Undefined command id %d", req->cmd); + return -1; + } + return cb[req->cmd](fd, req, handler); +} + +static void lxc_exec_cmd_fd_cleanup(int fd, struct lxc_epoll_descr *descr) +{ + lxc_mainloop_del_handler(descr, fd); + close(fd); + return; +} + +static int lxc_exec_cmd_handler(int fd, uint32_t events, void *data, + struct lxc_epoll_descr *descr) +{ + int ret; + struct lxc_exec_cmd_req req; + void *reqdata = NULL; + struct lxc_exec_command_handler *handler = data; + + ret = lxc_abstract_unix_rcv_credential(fd, &req, sizeof(req)); + if (ret < 0) { + SYSERROR("Failed to receive data on command socket for command " + "\"%s\"", lxc_exec_cmd_str(req.cmd)); + + if (errno == EACCES) { + /* We don't care for the peer, just send and close. */ + struct lxc_exec_cmd_rsp rsp = {.ret = ret}; + + lxc_exec_cmd_rsp_send(fd, &rsp); + } + + goto out_close; + } + + if (ret == 0) + goto out_close; + + if (ret != sizeof(req)) { + WARN("Failed to receive full command request. Ignoring request " + "for \"%s\"", lxc_exec_cmd_str(req.cmd)); + ret = -1; + goto out_close; + } + + if (req.datalen > LXC_CMD_DATA_MAX) { + ERROR("Received command data length %d is too large for " + "command \"%s\"", req.datalen, lxc_exec_cmd_str(req.cmd)); + errno = EFBIG; + ret = -EFBIG; + goto out_close; + } + + if (req.datalen > 0) { + reqdata = alloca(req.datalen); + if (!reqdata) { + ERROR("Failed to allocate memory for \"%s\" command", + lxc_exec_cmd_str(req.cmd)); + errno = ENOMEM; + ret = -ENOMEM; + goto out_close; + } + + ret = lxc_recv_nointr(fd, reqdata, req.datalen, 0); + if (ret != req.datalen) { + WARN("Failed to receive full command request. Ignoring " + "request for \"%s\"", lxc_exec_cmd_str(req.cmd)); + ret = LXC_MAINLOOP_ERROR; + goto out_close; + } + + req.data = reqdata; + } + + ret = lxc_exec_cmd_process(fd, &req, handler); + if (ret) { + /* This is not an error, but only a request to close fd. */ + ret = LXC_MAINLOOP_CONTINUE; + goto out_close; + } + +out: + return ret; + +out_close: + lxc_exec_cmd_fd_cleanup(fd, descr); + goto out; +} + +static int lxc_exec_cmd_accept(int fd, uint32_t events, void *data, + struct lxc_epoll_descr *descr) +{ + int connection = -1; + int opt = 1, ret = -1; + + connection = accept(fd, NULL, 0); + if (connection < 0) { + SYSERROR("Failed to accept connection to run command"); + return LXC_MAINLOOP_ERROR; + } + + ret = fcntl(connection, F_SETFD, FD_CLOEXEC); + if (ret < 0) { + SYSERROR("Failed to set close-on-exec on incoming command connection"); + goto out_close; + } + + ret = setsockopt(connection, SOL_SOCKET, SO_PASSCRED, &opt, sizeof(opt)); + if (ret < 0) { + SYSERROR("Failed to enable necessary credentials on command socket"); + goto out_close; + } + + ret = lxc_mainloop_add_handler(descr, connection, lxc_exec_cmd_handler, data); + if (ret) { + ERROR("Failed to add command handler"); + goto out_close; + } + +out: + return ret; + +out_close: + close(connection); + goto out; +} + +int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char *suffix) +{ + int fd, ret; + char path[LXC_AUDS_ADDR_LEN] = {0}; + + ret = lxc_make_abstract_socket_name(path, sizeof(path), name, lxcpath, NULL, suffix); + if (ret < 0) + return -1; + TRACE("Creating abstract unix socket \"%s\"", &path[1]); + + fd = lxc_abstract_unix_open(path, SOCK_STREAM, 0); + if (fd < 0) { + SYSERROR("Failed to create command socket %s", &path[1]); + if (errno == EADDRINUSE) + ERROR("Container \"%s\" appears to be already running", name); + + return -1; + } + + ret = fcntl(fd, F_SETFD, FD_CLOEXEC); + if (ret < 0) { + SYSERROR("Failed to set FD_CLOEXEC on command socket file descriptor"); + close(fd); + return -1; + } + + return fd; +} + +int lxc_exec_cmd_mainloop_add(struct lxc_epoll_descr *descr, struct lxc_exec_command_handler *handler) +{ + int ret; + int fd = handler->maincmd_fd; + + ret = lxc_mainloop_add_handler(descr, fd, lxc_exec_cmd_accept, handler); + if (ret < 0) { + ERROR("Failed to add handler for command socket"); + close(fd); + } + + return ret; +} diff --git a/src/lxc/exec_commands.h b/src/lxc/exec_commands.h new file mode 100644 index 000000000..2581ee903 --- /dev/null +++ b/src/lxc/exec_commands.h @@ -0,0 +1,73 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + * Author: lifeng + * Create: 2019-12-08 + * Description: provide container definition + * lxc: linux Container library + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ******************************************************************************/ + +#ifndef __LXC_EXEC_COMMANDS_H +#define __LXC_EXEC_COMMANDS_H + +#include +#include +#include + +#include "lxccontainer.h" +#include "macro.h" +#include "state.h" +#include "terminal.h" + +struct lxc_exec_command_handler { + int maincmd_fd; + struct lxc_terminal *terminal; +}; + +typedef enum { + LXC_EXEC_CMD_SET_TERMINAL_WINCH, + LXC_EXEC_CMD_MAX, +} lxc_exec_cmd_t; + +struct lxc_exec_cmd_req { + lxc_exec_cmd_t cmd; + int datalen; + const void *data; +}; + +struct lxc_exec_cmd_rsp { + int ret; /* 0 on success, -errno on failure */ + int datalen; + void *data; +}; + +struct lxc_exec_cmd_rr { + struct lxc_exec_cmd_req req; + struct lxc_exec_cmd_rsp rsp; +}; + +struct lxc_exec_cmd_set_terminal_winch_request { + unsigned int height; + unsigned int width; +}; + +struct lxc_epoll_descr; +struct lxc_handler; + +extern int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char *suffix); +extern int lxc_exec_cmd_mainloop_add(struct lxc_epoll_descr *descr, struct lxc_exec_command_handler *handler); +extern int lxc_exec_cmd_set_terminal_winch(const char *name, const char *lxcpath, const char *suffix, unsigned int height, unsigned int width); + +#endif /* __exec_commands_h */ diff --git a/src/lxc/execute.c b/src/lxc/execute.c index 7175ef2cf..16c0fed05 100644 --- a/src/lxc/execute.c +++ b/src/lxc/execute.c @@ -14,12 +14,16 @@ #include "config.h" #include "log.h" #include "start.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "utils.h" lxc_log_define(execute, start); +#ifdef HAVE_ISULAD +static int execute_start(struct lxc_handler *handler, void* data, int fd) +#else static int execute_start(struct lxc_handler *handler, void* data) +#endif { int argc_add, j; char **argv; @@ -66,11 +70,14 @@ static int execute_start(struct lxc_handler *handler, void* data) NOTICE("Exec'ing \"%s\"", my_args->argv[0]); if (my_args->init_fd >= 0) - execveat(my_args->init_fd, "", argv, environ, AT_EMPTY_PATH); + lxc_raw_execveat(my_args->init_fd, "", argv, environ, AT_EMPTY_PATH); else execvp(argv[0], argv); SYSERROR("Failed to exec %s", argv[0]); +#ifdef HAVE_ISULAD + lxc_write_error_message(fd, "Failed to exec: \"%s\": %s.", argv[0], strerror(errno)); +#endif free(argv); out1: return 1; @@ -88,14 +95,26 @@ static struct lxc_operations execute_start_ops = { .post_start = execute_post_start }; +#ifdef HAVE_ISULAD +int lxc_execute(const char *name, char *const argv[], int quiet, + struct lxc_handler *handler, const char *lxcpath, + bool daemonize, int *error_num, unsigned int start_timeout) +#else int lxc_execute(const char *name, char *const argv[], int quiet, struct lxc_handler *handler, const char *lxcpath, bool daemonize, int *error_num) +#endif { + struct execute_args args = {.argv = argv, .quiet = quiet}; TRACE("Doing lxc_execute"); handler->conf->is_execute = true; +#ifdef HAVE_ISULAD + return __lxc_start(handler, &execute_start_ops, &args, lxcpath, + daemonize, error_num, start_timeout); +#else return __lxc_start(handler, &execute_start_ops, &args, lxcpath, daemonize, error_num); +#endif } diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h index f9c8abe03..6d5dbf68d 100644 --- a/src/lxc/file_utils.h +++ b/src/lxc/file_utils.h @@ -12,52 +12,27 @@ #include #include -#include "compiler.h" - /* read and write whole files */ extern int lxc_write_to_file(const char *filename, const void *buf, - size_t count, bool add_newline, mode_t mode) -__access_r(2, 3); - -extern int lxc_readat(int dirfd, const char *filename, void *buf, size_t count) -__access_w(3, 4); - + size_t count, bool add_newline, mode_t mode); +extern int lxc_readat(int dirfd, const char *filename, void *buf, size_t count); extern int lxc_writeat(int dirfd, const char *filename, const void *buf, - size_t count) -__access_r(3, 4); - + size_t count); extern int lxc_write_openat(const char *dir, const char *filename, - const void *buf, size_t count) -__access_r(3, 4); - -extern int lxc_read_from_file(const char *filename, void *buf, size_t count) -__access_w(2, 3); + const void *buf, size_t count); +extern int lxc_read_from_file(const char *filename, void *buf, size_t count); /* send and receive buffers completely */ -extern ssize_t lxc_write_nointr(int fd, const void *buf, size_t count) -__access_r(2, 3); - +extern ssize_t lxc_write_nointr(int fd, const void *buf, size_t count); extern ssize_t lxc_pwrite_nointr(int fd, const void *buf, size_t count, - off_t offset) -__access_r(2, 3); - -extern ssize_t lxc_send_nointr(int sockfd, void *buf, size_t len, int flags) -__access_r(2, 3); - -extern ssize_t lxc_read_nointr(int fd, void *buf, size_t count) -__access_w(2, 3); - + off_t offset); +extern ssize_t lxc_send_nointr(int sockfd, void *buf, size_t len, int flags); +extern ssize_t lxc_read_nointr(int fd, void *buf, size_t count); extern ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count, - const void *expected_buf) -__access_w(2, 3); - + const void *expected_buf); extern ssize_t lxc_read_file_expect(const char *path, void *buf, size_t count, - const void *expected_buf) -__access_w(2, 3); - -extern ssize_t lxc_recv_nointr(int sockfd, void *buf, size_t len, int flags) -__access_w(2, 3); - + const void *expected_buf); +extern ssize_t lxc_recv_nointr(int sockfd, void *buf, size_t len, int flags); ssize_t lxc_recvmsg_nointr_iov(int sockfd, struct iovec *iov, size_t iovlen, int flags); diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c index 5549c2e8f..76f00488a 100644 --- a/src/lxc/initutils.c +++ b/src/lxc/initutils.c @@ -54,11 +54,15 @@ const char *lxc_global_config_value(const char *option_name) { NULL, NULL }, }; +#ifdef HAVE_ISULAD + static const char *values[sizeof(options) / sizeof(options[0])] = {0}; +#else /* placed in the thread local storage pool for non-bionic targets */ #ifdef HAVE_TLS static thread_local const char *values[sizeof(options) / sizeof(options[0])] = {0}; #else static const char *values[sizeof(options) / sizeof(options[0])] = {0}; +#endif #endif /* user_config_path is freed as soon as it is used */ diff --git a/src/lxc/isulad_utils.c b/src/lxc/isulad_utils.c new file mode 100644 index 000000000..b2824045c --- /dev/null +++ b/src/lxc/isulad_utils.c @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved + * Description: isulad utils + * Author: lifeng + * Create: 2020-04-11 +******************************************************************************/ + +#include +#include +#include +#include +#include + +#include "isulad_utils.h" +#include "log.h" +#include "path.h" +#include "file_utils.h" + +lxc_log_define(isulad_utils, lxc); + +void *lxc_common_calloc_s(size_t size) +{ + if (size == 0 || size > SIZE_MAX) { + return NULL; + } + + return calloc((size_t)1, size); +} + +int lxc_mem_realloc(void **newptr, size_t newsize, void *oldptr, size_t oldsize) +{ + void *tmp = NULL; + + if (newsize == 0) { + goto err_out; + } + + tmp = lxc_common_calloc_s(newsize); + if (tmp == NULL) { + ERROR("Failed to malloc memory"); + goto err_out; + } + + if (oldptr != NULL) { + memcpy(tmp, oldptr, (newsize < oldsize) ? newsize : oldsize); + + memset(oldptr, 0, oldsize); + + free(oldptr); + } + + *newptr = tmp; + return 0; + +err_out: + return -1; +} + +char *safe_strdup(const char *src) +{ + char *dst = NULL; + + if (src == NULL) { + return NULL; + } + + dst = strdup(src); + if (dst == NULL) { + abort(); + } + + return dst; +} + +int lxc_open(const char *filename, int flags, mode_t mode) +{ + char rpath[PATH_MAX] = {0x00}; + + if (cleanpath(filename, rpath, sizeof(rpath)) == NULL) { + return -1; + } + if (mode) { + return open(rpath, (int)((unsigned int)flags | O_CLOEXEC), mode); + } else { + return open(rpath, (int)((unsigned int)flags | O_CLOEXEC)); + } +} + +FILE *lxc_fopen(const char *filename, const char *mode) +{ + char rpath[PATH_MAX] = {0x00}; + + if (cleanpath(filename, rpath, sizeof(rpath)) == NULL) { + return NULL; + } + + return fopen_cloexec(rpath, mode); +} diff --git a/src/lxc/isulad_utils.h b/src/lxc/isulad_utils.h new file mode 100644 index 000000000..7a6ab00e2 --- /dev/null +++ b/src/lxc/isulad_utils.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved + * Description: isulad utils + * Author: lifeng + * Create: 2020-04-11 +******************************************************************************/ +#ifndef __iSULAD_UTILS_H +#define __iSULAD_UTILS_H + +#include + +extern int lxc_mem_realloc(void **newptr, size_t newsize, void *oldptr, size_t oldsize); +extern void *lxc_common_calloc_s(size_t size); +extern char *safe_strdup(const char *src); + +extern int lxc_open(const char *filename, int flags, mode_t mode); +extern FILE *lxc_fopen(const char *filename, const char *mode); + +#endif diff --git a/src/lxc/json/defs.c b/src/lxc/json/defs.c new file mode 100644 index 000000000..4bf569a4e --- /dev/null +++ b/src/lxc/json/defs.c @@ -0,0 +1,205 @@ +// Generated from defs.json. Do not edit! +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include "defs.h" + +defs_hook *make_defs_hook(yajl_val tree, struct parser_context *ctx, parser_error *err) { + defs_hook *ret = NULL; + *err = 0; + if (tree == NULL) + return ret; + ret = safe_malloc(sizeof(*ret)); + { + yajl_val val = get_val(tree, "path", yajl_t_string); + if (val != NULL) { + char *str = YAJL_GET_STRING(val); + ret->path = safe_strdup(str ? str : ""); + } + } + { + yajl_val tmp = get_val(tree, "args", yajl_t_array); + if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { + size_t i; + ret->args_len = YAJL_GET_ARRAY(tmp)->len; + if (YAJL_GET_ARRAY(tmp)->len > SIZE_MAX / sizeof(*ret->args) - 1) { + free_defs_hook(ret); + return NULL; + } + ret->args = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->args)); + for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { + yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; + if (val != NULL) { + char *str = YAJL_GET_STRING(val); + ret->args[i] = safe_strdup(str ? str : ""); + } + } + } + } + { + yajl_val tmp = get_val(tree, "env", yajl_t_array); + if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { + size_t i; + ret->env_len = YAJL_GET_ARRAY(tmp)->len; + if (YAJL_GET_ARRAY(tmp)->len > SIZE_MAX / sizeof(*ret->env) - 1) { + free_defs_hook(ret); + return NULL; + } + ret->env = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->env)); + for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { + yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; + if (val != NULL) { + char *str = YAJL_GET_STRING(val); + ret->env[i] = safe_strdup(str ? str : ""); + } + } + } + } + { + yajl_val val = get_val(tree, "timeout", yajl_t_number); + if (val != NULL) { + int invalid = common_safe_int(YAJL_GET_NUMBER(val), (int *)&ret->timeout); + if (invalid) { + if (asprintf(err, "Invalid value '%s' with type 'integer' for key 'timeout': %s", YAJL_GET_NUMBER(val), strerror(-invalid)) < 0) + *err = safe_strdup("error allocating memory"); + free_defs_hook(ret); + return NULL; + } + } + } + if (ret->path == NULL) { + if (asprintf(err, "Required field '%s' not present", "path") < 0) + *err = safe_strdup("error allocating memory"); + free_defs_hook(ret); + return NULL; + } + + if (tree->type == yajl_t_object && (ctx->options & PARSE_OPTIONS_STRICT)) { + int i; + for (i = 0; i < tree->u.object.len; i++) + if (strcmp(tree->u.object.keys[i], "path") && + strcmp(tree->u.object.keys[i], "args") && + strcmp(tree->u.object.keys[i], "env") && + strcmp(tree->u.object.keys[i], "timeout")) { + if (ctx->stderr > 0) + fprintf(ctx->stderr, "WARNING: unknown key found: %s\n", tree->u.object.keys[i]); + } + } + return ret; +} + +void free_defs_hook(defs_hook *ptr) { + if (ptr == NULL) + return; + free(ptr->path); + ptr->path = NULL; + if (ptr->args != NULL) { + size_t i; + for (i = 0; i < ptr->args_len; i++) { + if (ptr->args[i] != NULL) { + free(ptr->args[i]); + ptr->args[i] = NULL; + } + } + free(ptr->args); + ptr->args = NULL; + } + if (ptr->env != NULL) { + size_t i; + for (i = 0; i < ptr->env_len; i++) { + if (ptr->env[i] != NULL) { + free(ptr->env[i]); + ptr->env[i] = NULL; + } + } + free(ptr->env); + ptr->env = NULL; + } + free(ptr); +} + +yajl_gen_status gen_defs_hook(yajl_gen g, defs_hook *ptr, struct parser_context *ctx, parser_error *err) { + yajl_gen_status stat = yajl_gen_status_ok; + *err = 0; + stat = reformat_start_map(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->path != NULL)) { + char *str = ""; + stat = reformat_map_key(g, "path", strlen("path")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->path != NULL) { + str = ptr->path; + } + stat = reformat_string(g, str, strlen(str)); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) || (ptr != NULL && ptr->args != NULL)) { + size_t len = 0, i; + stat = reformat_map_key(g, "args", strlen("args")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->args != NULL) { + len = ptr->args_len; + } + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 0); + stat = reformat_start_array(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + for (i = 0; i < len; i++) { + stat = reformat_string(g, ptr->args[i], strlen(ptr->args[i])); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_end_array(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 1); + } + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) || (ptr != NULL && ptr->env != NULL)) { + size_t len = 0, i; + stat = reformat_map_key(g, "env", strlen("env")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->env != NULL) { + len = ptr->env_len; + } + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 0); + stat = reformat_start_array(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + for (i = 0; i < len; i++) { + stat = reformat_string(g, ptr->env[i], strlen(ptr->env[i])); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_end_array(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 1); + } + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->timeout)) { + long long int num = 0; + stat = reformat_map_key(g, "timeout", strlen("timeout")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->timeout) { + num = (long long int)ptr->timeout; + } + stat = reformat_int(g, num); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_end_map(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + return yajl_gen_status_ok; +} diff --git a/src/lxc/json/defs.h b/src/lxc/json/defs.h new file mode 100644 index 000000000..0bbd8ac89 --- /dev/null +++ b/src/lxc/json/defs.h @@ -0,0 +1,37 @@ +// Generated from defs.json. Do not edit! +#ifndef DEFS_SCHEMA_H +#define DEFS_SCHEMA_H + +#include +#include +#include "json_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + char *path; + + char **args; + size_t args_len; + + char **env; + size_t env_len; + + int timeout; + +} +defs_hook; + +void free_defs_hook(defs_hook *ptr); + +defs_hook *make_defs_hook(yajl_val tree, struct parser_context *ctx, parser_error *err); + +yajl_gen_status gen_defs_hook(yajl_gen g, defs_hook *ptr, struct parser_context *ctx, parser_error *err); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/lxc/json/json_common.c b/src/lxc/json/json_common.c new file mode 100755 index 000000000..ec20c5982 --- /dev/null +++ b/src/lxc/json/json_common.c @@ -0,0 +1,1153 @@ +// Auto generated file. Do not edit! +#define _GNU_SOURCE +#include +#include +#include +#include "json_common.h" + +#define MAX_NUM_STR_LEN 21 + +yajl_gen_status reformat_number(void *ctx, const char *str, size_t len) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_number(g, str, len); +} + +yajl_gen_status reformat_uint(void *ctx, long long unsigned int num) { + char numstr[MAX_NUM_STR_LEN]; + int ret; + + ret = snprintf(numstr, MAX_NUM_STR_LEN, "%llu", num); + if (ret < 0 || ret >= MAX_NUM_STR_LEN) { + return yajl_gen_in_error_state; + } + return reformat_number(ctx, (const char *)numstr, strlen(numstr)); +} + +yajl_gen_status reformat_int(void *ctx, long long int num) { + char numstr[MAX_NUM_STR_LEN]; + int ret; + + ret = snprintf(numstr, MAX_NUM_STR_LEN, "%lld", num); + if (ret < 0 || ret >= MAX_NUM_STR_LEN) { + return yajl_gen_in_error_state; + } + return reformat_number(ctx, (const char *)numstr, strlen(numstr)); +} + +yajl_gen_status reformat_double(void *ctx, double num) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_double(g, num); +} + +yajl_gen_status reformat_string(void *ctx, const char *str, size_t len) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_string(g, (const unsigned char *)str, len); +} + +yajl_gen_status reformat_null(void *ctx) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_null(g); +} + +yajl_gen_status reformat_bool(void *ctx, int boolean) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_bool(g, boolean); +} + +yajl_gen_status reformat_map_key(void *ctx, const char *str, size_t len) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_string(g, (const unsigned char *)str, len); +} + +yajl_gen_status reformat_start_map(void *ctx) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_map_open(g); +} + +yajl_gen_status reformat_end_map(void *ctx) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_map_close(g); +} + +yajl_gen_status reformat_start_array(void *ctx) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_array_open(g); +} + +yajl_gen_status reformat_end_array(void *ctx) { + yajl_gen g = (yajl_gen) ctx; + return yajl_gen_array_close(g); +} + +bool json_gen_init(yajl_gen *g, struct parser_context *ctx) { + *g = yajl_gen_alloc(NULL); + if (NULL == *g) { + return false; + + } + yajl_gen_config(*g, yajl_gen_beautify, !(ctx->options & GEN_OPTIONS_SIMPLIFY)); + yajl_gen_config(*g, yajl_gen_validate_utf8, !(ctx->options & GEN_OPTIONS_NOT_VALIDATE_UTF8)); + return true; +} + +yajl_val get_val(yajl_val tree, const char *name, yajl_type type) { + const char *path[] = { name, NULL }; + return yajl_tree_get(tree, path, type); +} + +void *safe_malloc(size_t size) { + void *ret = NULL; + if (size == 0) { + abort(); + } + ret = calloc(1, size); + if (ret == NULL) { + abort(); + } + return ret; +} + +int common_safe_double(const char *numstr, double *converted) { + char *err_str = NULL; + double d; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + d = strtod(numstr, &err_str); + if (errno > 0) { + return -errno; + } + + if (err_str == NULL || err_str == numstr || *err_str != '\0') { + return -EINVAL; + } + + *converted = d; + return 0; +} + +int common_safe_uint8(const char *numstr, uint8_t *converted) { + char *err = NULL; + unsigned long int uli; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + uli = strtoul(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + if (uli > UINT8_MAX) { + return -ERANGE; + } + + *converted = (uint8_t)uli; + return 0; +} + +int common_safe_uint16(const char *numstr, uint16_t *converted) { + char *err = NULL; + unsigned long int uli; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + uli = strtoul(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + if (uli > UINT16_MAX) { + return -ERANGE; + } + + *converted = (uint16_t)uli; + return 0; +} + +int common_safe_uint32(const char *numstr, uint32_t *converted) { + char *err = NULL; + unsigned long long int ull; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + ull = strtoull(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + if (ull > UINT32_MAX) { + return -ERANGE; + } + + *converted = (uint32_t)ull; + return 0; +} + +int common_safe_uint64(const char *numstr, uint64_t *converted) { + char *err = NULL; + unsigned long long int ull; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + ull = strtoull(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + *converted = (uint64_t)ull; + return 0; +} + +int common_safe_uint(const char *numstr, unsigned int *converted) { + char *err = NULL; + unsigned long long int ull; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + ull = strtoull(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + if (ull > UINT_MAX) { + return -ERANGE; + } + + *converted = (unsigned int)ull; + return 0; +} + +int common_safe_int8(const char *numstr, int8_t *converted) { + char *err = NULL; + long int li; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + li = strtol(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + if (li > INT8_MAX || li < INT8_MIN) { + return -ERANGE; + } + + *converted = (int8_t)li; + return 0; +} + +int common_safe_int16(const char *numstr, int16_t *converted) { + char *err = NULL; + long int li; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + li = strtol(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + if (li > INT16_MAX || li < INT16_MIN) { + return -ERANGE; + } + + *converted = (int16_t)li; + return 0; +} + +int common_safe_int32(const char *numstr, int32_t *converted) { + char *err = NULL; + long long int lli; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + lli = strtol(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + if (lli > INT32_MAX || lli < INT32_MIN) { + return -ERANGE; + } + + *converted = (int32_t)lli; + return 0; +} + +int common_safe_int64(const char *numstr, int64_t *converted) { + char *err = NULL; + long long int lli; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + lli = strtoll(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + *converted = (int64_t)lli; + return 0; +} + +int common_safe_int(const char *numstr, int *converted) { + char *err = NULL; + long long int lli; + + if (numstr == NULL) { + return -EINVAL; + } + + errno = 0; + lli = strtol(numstr, &err, 0); + if (errno > 0) { + return -errno; + } + + if (err == NULL || err == numstr || *err != '\0') { + return -EINVAL; + } + + if (lli > INT_MAX || lli < INT_MIN) { + return -ERANGE; + } + + *converted = (int)lli; + return 0; +} + +yajl_gen_status gen_json_map_int_int(void *ctx, json_map_int_int *map, struct parser_context *ptx, parser_error *err) { + yajl_gen_status stat = yajl_gen_status_ok; + yajl_gen g = (yajl_gen) ctx; + size_t len = 0, i = 0; + if (map != NULL) { + len = map->len; + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 0); + } + stat = reformat_start_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + + } + for (i = 0; i < len; i++) { + char numstr[MAX_NUM_STR_LEN]; + int nret; + nret = snprintf(numstr, MAX_NUM_STR_LEN, "%lld", (long long int)map->keys[i]); + if (nret < 0 || nret >= MAX_NUM_STR_LEN) { + if (!*err && asprintf(err, "Error to print string") < 0) { + *(err) = safe_strdup("error allocating memory"); + } + return yajl_gen_in_error_state; + } + stat = reformat_string(g, numstr, strlen(numstr)); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_int(g, map->values[i]); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + } + + stat = reformat_end_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 1); + } + return yajl_gen_status_ok; +} + +void free_json_map_int_int(json_map_int_int *map) { + if (map != NULL) { + size_t i; + for (i = 0; i < map->len; i++) { + // No need to free key for type int + // No need to free value for type int + } + free(map->keys); + map->keys = NULL; + free(map->values); + map->values = NULL; + free(map); + } +} +json_map_int_int *make_json_map_int_int(yajl_val src, struct parser_context *ctx, parser_error *err) { + json_map_int_int *ret = NULL; + if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { + size_t i; + size_t len = YAJL_GET_OBJECT(src)->len; + if (len > SIZE_MAX / sizeof(int) - 1) { + return NULL; + } + ret = safe_malloc(sizeof(*ret)); + ret->len = len; + ret->keys = safe_malloc((len + 1) * sizeof(int)); + ret->values = safe_malloc((len + 1) * sizeof(int)); + for (i = 0; i < len; i++) { + const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; + yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; + + if (srckey != NULL) { + int invalid; + invalid = common_safe_int(srckey, &(ret->keys[i])); + if (invalid) { + if (*err == NULL && asprintf(err, "Invalid key '%s' with type 'int': %s", srckey, strerror(-invalid)) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_int_int(ret); + return NULL; + } + } + + if (srcval != NULL) { + int invalid; + if (!YAJL_IS_NUMBER(srcval)) { + if (*err == NULL && asprintf(err, "Invalid value with type 'int' for key '%s'", srckey) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_int_int(ret); + return NULL; + } + invalid = common_safe_int(YAJL_GET_NUMBER(srcval), &(ret->values[i])); + if (invalid) { + if (*err == NULL && asprintf(err, "Invalid value with type 'int' for key '%s': %s", srckey, strerror(-invalid)) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_int_int(ret); + return NULL; + } + } + } + } + return ret; +} +int append_json_map_int_int(json_map_int_int *map, int key, int val) { + size_t len; + int *keys = NULL; + int *vals = NULL; + + if (map == NULL) { + return -1; + } + + if ((SIZE_MAX / sizeof(int) - 1) < map->len) { + return -1; + } + + len = map->len + 1; + keys = safe_malloc(len * sizeof(int)); + vals = safe_malloc(len * sizeof(int)); + + if (map->len) { + (void)memcpy(keys, map->keys, map->len * sizeof(int)); + (void)memcpy(vals, map->values, map->len * sizeof(int)); + } + free(map->keys); + map->keys = keys; + free(map->values); + map->values = vals; + map->keys[map->len] = key; + map->values[map->len] = val; + + map->len++; + return 0; +} + +yajl_gen_status gen_json_map_int_bool(void *ctx, json_map_int_bool *map, struct parser_context *ptx, parser_error *err) { + yajl_gen_status stat = yajl_gen_status_ok; + yajl_gen g = (yajl_gen) ctx; + size_t len = 0, i = 0; + if (map != NULL) { + len = map->len; + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 0); + } + stat = reformat_start_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + + } + for (i = 0; i < len; i++) { + char numstr[MAX_NUM_STR_LEN]; + int nret; + nret = snprintf(numstr, MAX_NUM_STR_LEN, "%lld", (long long int)map->keys[i]); + if (nret < 0 || nret >= MAX_NUM_STR_LEN) { + if (!*err && asprintf(err, "Error to print string") < 0) { + *(err) = safe_strdup("error allocating memory"); + } + return yajl_gen_in_error_state; + } + stat = reformat_string(g, numstr, strlen(numstr)); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_bool(g, map->values[i]); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + } + + stat = reformat_end_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 1); + } + return yajl_gen_status_ok; +} + +void free_json_map_int_bool(json_map_int_bool *map) { + if (map != NULL) { + free(map->keys); + map->keys = NULL; + free(map->values); + map->values = NULL; + free(map); + } +} +json_map_int_bool *make_json_map_int_bool(yajl_val src, struct parser_context *ctx, parser_error *err) { + json_map_int_bool *ret = NULL; + if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { + size_t i; + size_t len = YAJL_GET_OBJECT(src)->len; + if (len > SIZE_MAX / sizeof(int) - 1) { + return NULL; + } + ret = safe_malloc(sizeof(*ret)); + ret->len = len; + ret->keys = safe_malloc((len + 1) * sizeof(int)); + ret->values = safe_malloc((len + 1) * sizeof(bool)); + for (i = 0; i < len; i++) { + const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; + yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; + + if (srckey != NULL) { + int invalid; + invalid = common_safe_int(srckey, &(ret->keys[i])); + if (invalid) { + if (*err == NULL && asprintf(err, "Invalid key '%s' with type 'int': %s", srckey, strerror(-invalid)) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_int_bool(ret); + return NULL; + } + } + + if (srcval != NULL) { + if (YAJL_IS_TRUE(srcval)) { + ret->values[i] = true; + } else if (YAJL_IS_FALSE(srcval)) { + ret->values[i] = false; + } else { + if (*err == NULL && asprintf(err, "Invalid value with type 'bool' for key '%s'", srckey) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_int_bool(ret); + return NULL; + } + } + } + } + return ret; +} +int append_json_map_int_bool(json_map_int_bool *map, int key, bool val) { + size_t len; + int *keys = NULL; + bool *vals = NULL; + + if (map == NULL) { + return -1; + } + + if ((SIZE_MAX / sizeof(int) - 1) < map->len || (SIZE_MAX / sizeof(bool) - 1) < map->len) { + return -1; + } + + len = map->len + 1; + keys = safe_malloc(len * sizeof(int)); + vals = safe_malloc(len * sizeof(bool)); + + if (map->len) { + (void)memcpy(keys, map->keys, map->len * sizeof(int)); + (void)memcpy(vals, map->values, map->len * sizeof(bool)); + } + free(map->keys); + map->keys = keys; + free(map->values); + map->values = vals; + map->keys[map->len] = key; + map->values[map->len] = val; + + map->len++; + return 0; +} + +yajl_gen_status gen_json_map_int_string(void *ctx, json_map_int_string *map, struct parser_context *ptx, parser_error *err) { + yajl_gen_status stat = yajl_gen_status_ok; + yajl_gen g = (yajl_gen) ctx; + size_t len = 0, i = 0; + if (map != NULL) { + len = map->len; + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 0); + } + stat = reformat_start_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + + } + for (i = 0; i < len; i++) { + char numstr[MAX_NUM_STR_LEN]; + int nret; + nret = snprintf(numstr, MAX_NUM_STR_LEN, "%lld", (long long int)map->keys[i]); + if (nret < 0 || nret >= MAX_NUM_STR_LEN) { + if (!*err && asprintf(err, "Error to print string") < 0) { + *(err) = safe_strdup("error allocating memory"); + } + return yajl_gen_in_error_state; + } + stat = reformat_string(g, numstr, strlen(numstr)); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_string(g, map->values[i], strlen(map->values[i]));; + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + } + + stat = reformat_end_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 1); + } + return yajl_gen_status_ok; +} + +void free_json_map_int_string(json_map_int_string *map) { + if (map != NULL) { + size_t i; + for (i = 0; i < map->len; i++) { + // No need to free key for type int + free(map->values[i]); + map->values[i] = NULL; + } + free(map->keys); + map->keys = NULL; + free(map->values); + map->values = NULL; + free(map); + } +} +json_map_int_string *make_json_map_int_string(yajl_val src, struct parser_context *ctx, parser_error *err) { + json_map_int_string *ret = NULL; + if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { + size_t i; + size_t len = YAJL_GET_OBJECT(src)->len; + if (len > SIZE_MAX / sizeof(char *) - 1) { + return NULL; + } + ret = safe_malloc(sizeof(*ret)); + ret->len = len; + ret->keys = safe_malloc((len + 1) * sizeof(int)); + ret->values = safe_malloc((len + 1) * sizeof(char *)); + for (i = 0; i < len; i++) { + const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; + yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; + + if (srckey != NULL) { + int invalid; + invalid = common_safe_int(srckey, &(ret->keys[i])); + if (invalid) { + if (*err == NULL && asprintf(err, "Invalid key '%s' with type 'int': %s", srckey, strerror(-invalid)) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_int_string(ret); + return NULL; + } + } + + if (srcval != NULL) { + if (!YAJL_IS_STRING(srcval)) { + if (*err == NULL && asprintf(err, "Invalid value with type 'string' for key '%s'", srckey) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_int_string(ret); + return NULL; + } + char *str = YAJL_GET_STRING(srcval); + ret->values[i] = safe_strdup(str ? str : ""); + } + } + } + return ret; +} +int append_json_map_int_string(json_map_int_string *map, int key, const char *val) { + size_t len; + int *keys = NULL; + char **vals = NULL; + + if (map == NULL) { + return -1; + } + + if ((SIZE_MAX / sizeof(int) - 1) < map->len || (SIZE_MAX / sizeof(char *) - 1) < map->len) { + return -1; + } + + len = map->len + 1; + keys = safe_malloc(len * sizeof(int)); + vals = safe_malloc(len * sizeof(char *)); + + if (map->len) { + (void)memcpy(keys, map->keys, map->len * sizeof(int)); + (void)memcpy(vals, map->values, map->len * sizeof(char *)); + } + free(map->keys); + map->keys = keys; + free(map->values); + map->values = vals; + map->keys[map->len] = key; + map->values[map->len] = safe_strdup(val ? val : ""); + + map->len++; + return 0; +} + +yajl_gen_status gen_json_map_string_int(void *ctx, json_map_string_int *map, struct parser_context *ptx, parser_error *err) { + yajl_gen_status stat = yajl_gen_status_ok; + yajl_gen g = (yajl_gen) ctx; + size_t len = 0, i = 0; + if (map != NULL) { + len = map->len; + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 0); + } + stat = reformat_start_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + + } + for (i = 0; i < len; i++) { + stat = reformat_string(g, map->keys[i], strlen(map->keys[i])); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_int(g, map->values[i]); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + } + + stat = reformat_end_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 1); + } + return yajl_gen_status_ok; +} + +void free_json_map_string_int(json_map_string_int *map) { + if (map != NULL) { + size_t i; + for (i = 0; i < map->len; i++) { + free(map->keys[i]); + map->keys[i] = NULL; + // No need to free value for type int + } + free(map->keys); + map->keys = NULL; + free(map->values); + map->values = NULL; + free(map); + } +} +json_map_string_int *make_json_map_string_int(yajl_val src, struct parser_context *ctx, parser_error *err) { + json_map_string_int *ret = NULL; + if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { + size_t i; + size_t len = YAJL_GET_OBJECT(src)->len; + if (len > SIZE_MAX / sizeof(char *) - 1) { + return NULL; + } + ret = safe_malloc(sizeof(*ret)); + ret->len = len; + ret->keys = safe_malloc((len + 1) * sizeof(char *)); + ret->values = safe_malloc((len + 1) * sizeof(int)); + for (i = 0; i < len; i++) { + const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; + yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; + ret->keys[i] = safe_strdup(srckey ? srckey : ""); + + if (srcval != NULL) { + int invalid; + if (!YAJL_IS_NUMBER(srcval)) { + if (*err == NULL && asprintf(err, "Invalid value with type 'int' for key '%s'", srckey) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_string_int(ret); + return NULL; + } + invalid = common_safe_int(YAJL_GET_NUMBER(srcval), &(ret->values[i])); + if (invalid) { + if (*err == NULL && asprintf(err, "Invalid value with type 'int' for key '%s': %s", srckey, strerror(-invalid)) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_string_int(ret); + return NULL; + } + } + } + } + return ret; +} +int append_json_map_string_int(json_map_string_int *map, const char *key, int val) { + size_t len; + char **keys = NULL; + int *vals = NULL; + + if (map == NULL) { + return -1; + } + + if ((SIZE_MAX / sizeof(char *) - 1) < map->len || (SIZE_MAX / sizeof(int) - 1) < map->len) { + return -1; + } + + len = map->len + 1; + keys = safe_malloc(len * sizeof(char *)); + vals = safe_malloc(len * sizeof(int)); + + if (map->len) { + (void)memcpy(keys, map->keys, map->len * sizeof(char *)); + (void)memcpy(vals, map->values, map->len * sizeof(int)); + } + free(map->keys); + map->keys = keys; + free(map->values); + map->values = vals; + map->keys[map->len] = safe_strdup(key ? key : ""); + map->values[map->len] = val; + + map->len++; + return 0; +} + +yajl_gen_status gen_json_map_string_bool(void *ctx, json_map_string_bool *map, struct parser_context *ptx, parser_error *err) { + yajl_gen_status stat = yajl_gen_status_ok; + yajl_gen g = (yajl_gen) ctx; + size_t len = 0, i = 0; + if (map != NULL) { + len = map->len; + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 0); + } + stat = reformat_start_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + + } + for (i = 0; i < len; i++) { + stat = reformat_string(g, map->keys[i], strlen(map->keys[i])); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_bool(g, map->values[i]); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + } + + stat = reformat_end_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 1); + } + return yajl_gen_status_ok; +} + +void free_json_map_string_bool(json_map_string_bool *map) { + if (map != NULL) { + size_t i; + for (i = 0; i < map->len; i++) { + free(map->keys[i]); + map->keys[i] = NULL; + // No need to free value for type bool + } + free(map->keys); + map->keys = NULL; + free(map->values); + map->values = NULL; + free(map); + } +} +json_map_string_bool *make_json_map_string_bool(yajl_val src, struct parser_context *ctx, parser_error *err) { + json_map_string_bool *ret = NULL; + if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { + size_t i; + size_t len = YAJL_GET_OBJECT(src)->len; + if (len > SIZE_MAX / sizeof(char *) - 1) { + return NULL; + } + ret = safe_malloc(sizeof(*ret)); + ret->len = len; + ret->keys = safe_malloc((len + 1) * sizeof(char *)); + ret->values = safe_malloc((len + 1) * sizeof(bool)); + for (i = 0; i < len; i++) { + const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; + yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; + ret->keys[i] = safe_strdup(srckey ? srckey : ""); + + if (srcval != NULL) { + if (YAJL_IS_TRUE(srcval)) { + ret->values[i] = true; + } else if (YAJL_IS_FALSE(srcval)) { + ret->values[i] = false; + } else { + if (*err == NULL && asprintf(err, "Invalid value with type 'bool' for key '%s'", srckey) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_string_bool(ret); + return NULL; + } + } + } + } + return ret; +} + +int append_json_map_string_bool(json_map_string_bool *map, const char *key, bool val) { + size_t len; + char **keys = NULL; + bool *vals = NULL; + + if (map == NULL) { + return -1; + } + + if ((SIZE_MAX / sizeof(char *) - 1) < map->len || (SIZE_MAX / sizeof(bool) - 1) < map->len) { + return -1; + } + + len = map->len + 1; + keys = safe_malloc(len * sizeof(char *)); + vals = safe_malloc(len * sizeof(bool)); + + if (map->len) { + (void)memcpy(keys, map->keys, map->len * sizeof(char *)); + (void)memcpy(vals, map->values, map->len * sizeof(bool)); + } + free(map->keys); + map->keys = keys; + free(map->values); + map->values = vals; + map->keys[map->len] = safe_strdup(key ? key : ""); + map->values[map->len] = val; + + map->len++; + return 0; +} + +yajl_gen_status gen_json_map_string_string(void *ctx, json_map_string_string *map, struct parser_context *ptx, parser_error *err) { + yajl_gen_status stat = yajl_gen_status_ok; + yajl_gen g = (yajl_gen) ctx; + size_t len = 0, i = 0; + if (map != NULL) { + len = map->len; + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 0); + } + stat = reformat_start_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + + } + for (i = 0; i < len; i++) { + stat = reformat_string(g, map->keys[i], strlen(map->keys[i])); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_string(g, map->values[i], strlen(map->values[i]));; + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + } + + stat = reformat_end_map(g); + if (yajl_gen_status_ok != stat) { + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { + yajl_gen_config(g, yajl_gen_beautify, 1); + } + return yajl_gen_status_ok; +} + +void free_json_map_string_string(json_map_string_string *map) { + if (map != NULL) { + size_t i; + for (i = 0; i < map->len; i++) { + free(map->keys[i]); + map->keys[i] = NULL; + free(map->values[i]); + map->values[i] = NULL; + } + free(map->keys); + map->keys = NULL; + free(map->values); + map->values = NULL; + free(map); + } +} +json_map_string_string *make_json_map_string_string(yajl_val src, struct parser_context *ctx, parser_error *err) { + json_map_string_string *ret = NULL; + if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { + size_t i; + size_t len = YAJL_GET_OBJECT(src)->len; + if (len > SIZE_MAX / sizeof(char *) - 1) { + return NULL; + } + ret = safe_malloc(sizeof(*ret)); + ret->len = len; + ret->keys = safe_malloc((len + 1) * sizeof(char *)); + ret->values = safe_malloc((len + 1) * sizeof(char *)); + for (i = 0; i < len; i++) { + const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; + yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; + ret->keys[i] = safe_strdup(srckey ? srckey : ""); + + if (srcval != NULL) { + if (!YAJL_IS_STRING(srcval)) { + if (*err == NULL && asprintf(err, "Invalid value with type 'string' for key '%s'", srckey) < 0) { + *(err) = safe_strdup("error allocating memory"); + } + free_json_map_string_string(ret); + return NULL; + } + char *str = YAJL_GET_STRING(srcval); + ret->values[i] = safe_strdup(str ? str : ""); + } + } + } + return ret; +} +int append_json_map_string_string(json_map_string_string *map, const char *key, const char *val) { + size_t len, i; + char **keys = NULL; + char **vals = NULL; + + if (map == NULL) { + return -1; + } + + for (i = 0; i < map->len; i++) { + if (strcmp(map->keys[i], key) == 0) { + free(map->values[i]); + map->values[i] = safe_strdup(val ? val : ""); + return 0; + } + } + + if ((SIZE_MAX / sizeof(char *) - 1) < map->len) { + return -1; + } + + len = map->len + 1; + keys = safe_malloc(len * sizeof(char *)); + vals = safe_malloc(len * sizeof(char *)); + + if (map->len) { + (void)memcpy(keys, map->keys, map->len * sizeof(char *)); + (void)memcpy(vals, map->values, map->len * sizeof(char *)); + } + free(map->keys); + map->keys = keys; + free(map->values); + map->values = vals; + map->keys[map->len] = safe_strdup(key ? key : ""); + map->values[map->len] = safe_strdup(val ? val : ""); + + map->len++; + return 0; +} diff --git a/src/lxc/json/json_common.h b/src/lxc/json/json_common.h new file mode 100755 index 000000000..60aa5fd93 --- /dev/null +++ b/src/lxc/json/json_common.h @@ -0,0 +1,185 @@ +// Auto generated file. Do not edit! +#ifndef _JSON_COMMON_H +#define _JSON_COMMON_H + +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" + +#ifdef __cplusplus +extern "C" { +#endif + +# undef linux + +//options to report error if there is unknown key found in json +# define PARSE_OPTIONS_STRICT 0x01 +//options to generate all key and value +# define GEN_OPTIONS_ALLKEYVALUE 0x02 +//options to generate simplify(no indent) json string +# define GEN_OPTIONS_SIMPLIFY 0x04 +//options not to validate utf8 data +# define GEN_OPTIONS_NOT_VALIDATE_UTF8 0x08 + +#define GEN_SET_ERROR_AND_RETURN(stat, err) { \ + if (*(err) == NULL) {\ + if (asprintf(err, "%s: %s: %d: error generating json, errcode: %d", __FILE__, __func__, __LINE__, stat) < 0) { \ + *(err) = safe_strdup("error allocating memory"); \ + } \ + }\ + return stat; \ + } + +typedef char *parser_error; + +struct parser_context { + unsigned int options; + FILE *stderr; +}; + +yajl_gen_status reformat_number(void *ctx, const char *str, size_t len); + +yajl_gen_status reformat_uint(void *ctx, long long unsigned int num); + +yajl_gen_status reformat_int(void *ctx, long long int num); + +yajl_gen_status reformat_double(void *ctx, double num); + +yajl_gen_status reformat_string(void *ctx, const char *str, size_t len); + +yajl_gen_status reformat_null(void *ctx); + +yajl_gen_status reformat_bool(void *ctx, int boolean); + +yajl_gen_status reformat_map_key(void *ctx, const char *str, size_t len); + +yajl_gen_status reformat_start_map(void *ctx); + +yajl_gen_status reformat_end_map(void *ctx); + +yajl_gen_status reformat_start_array(void *ctx); + +yajl_gen_status reformat_end_array(void *ctx); + +bool json_gen_init(yajl_gen *g, struct parser_context *ctx); + +yajl_val get_val(yajl_val tree, const char *name, yajl_type type); + +void *safe_malloc(size_t size); + +int common_safe_double(const char *numstr, double *converted); + +int common_safe_uint8(const char *numstr, uint8_t *converted); + +int common_safe_uint16(const char *numstr, uint16_t *converted); + +int common_safe_uint32(const char *numstr, uint32_t *converted); + +int common_safe_uint64(const char *numstr, uint64_t *converted); + +int common_safe_uint(const char *numstr, unsigned int *converted); + +int common_safe_int8(const char *numstr, int8_t *converted); + +int common_safe_int16(const char *numstr, int16_t *converted); + +int common_safe_int32(const char *numstr, int32_t *converted); + +int common_safe_int64(const char *numstr, int64_t *converted); + +int common_safe_int(const char *numstr, int *converted); + +typedef struct { + int *keys; + int *values; + size_t len; +} json_map_int_int; + +void free_json_map_int_int(json_map_int_int *map); + +json_map_int_int *make_json_map_int_int(yajl_val src, struct parser_context *ctx, parser_error *err); + +yajl_gen_status gen_json_map_int_int(void *ctx, json_map_int_int *map, struct parser_context *ptx, parser_error *err); + +int append_json_map_int_int(json_map_int_int *map, int key, int val); + +typedef struct { + int *keys; + bool *values; + size_t len; +} json_map_int_bool; + +void free_json_map_int_bool(json_map_int_bool *map); + +json_map_int_bool *make_json_map_int_bool(yajl_val src, struct parser_context *ctx, parser_error *err); + +yajl_gen_status gen_json_map_int_bool(void *ctx, json_map_int_bool *map, struct parser_context *ptx, parser_error *err); + +int append_json_map_int_bool(json_map_int_bool *map, int key, bool val); + +typedef struct { + int *keys; + char **values; + size_t len; +} json_map_int_string; + +void free_json_map_int_string(json_map_int_string *map); + +json_map_int_string *make_json_map_int_string(yajl_val src, struct parser_context *ctx, parser_error *err); + +yajl_gen_status gen_json_map_int_string(void *ctx, json_map_int_string *map, struct parser_context *ptx, parser_error *err); + +int append_json_map_int_string(json_map_int_string *map, int key, const char *val); + +typedef struct { + char **keys; + int *values; + size_t len; +} json_map_string_int; + +void free_json_map_string_int(json_map_string_int *map); + +json_map_string_int *make_json_map_string_int(yajl_val src, struct parser_context *ctx, parser_error *err); + +yajl_gen_status gen_json_map_string_int(void *ctx, json_map_string_int *map, struct parser_context *ptx, parser_error *err); + +int append_json_map_string_int(json_map_string_int *map, const char *key, int val); + +typedef struct { + char **keys; + bool *values; + size_t len; +} json_map_string_bool; + +void free_json_map_string_bool(json_map_string_bool *map); + +json_map_string_bool *make_json_map_string_bool(yajl_val src, struct parser_context *ctx, parser_error *err); + +yajl_gen_status gen_json_map_string_bool(void *ctx, json_map_string_bool *map, struct parser_context *ptx, parser_error *err); + +int append_json_map_string_bool(json_map_string_bool *map, const char *key, bool val); + +typedef struct { + char **keys; + char **values; + size_t len; +} json_map_string_string; + +void free_json_map_string_string(json_map_string_string *map); + +json_map_string_string *make_json_map_string_string(yajl_val src, struct parser_context *ctx, parser_error *err); + +yajl_gen_status gen_json_map_string_string(void *ctx, json_map_string_string *map, struct parser_context *ptx, parser_error *err); + +int append_json_map_string_string(json_map_string_string *map, const char *key, const char *val); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/src/lxc/json/logger_json_file.c b/src/lxc/json/logger_json_file.c new file mode 100644 index 000000000..6abeef458 --- /dev/null +++ b/src/lxc/json/logger_json_file.c @@ -0,0 +1,246 @@ +// Generated from json-file.json. Do not edit! +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include "logger_json_file.h" + +logger_json_file *make_logger_json_file(yajl_val tree, struct parser_context *ctx, parser_error *err) { + logger_json_file *ret = NULL; + *err = 0; + if (tree == NULL) + return ret; + ret = safe_malloc(sizeof(*ret)); + { + yajl_val tmp = get_val(tree, "log", yajl_t_string); + if (tmp != NULL) { + char *str = YAJL_GET_STRING(tmp); + ret->log = (uint8_t *)safe_strdup(str ? str : ""); + ret->log_len = str != NULL ? strlen(str) : 0; + } + } + { + yajl_val val = get_val(tree, "stream", yajl_t_string); + if (val != NULL) { + char *str = YAJL_GET_STRING(val); + ret->stream = safe_strdup(str ? str : ""); + } + } + { + yajl_val val = get_val(tree, "time", yajl_t_string); + if (val != NULL) { + char *str = YAJL_GET_STRING(val); + ret->time = safe_strdup(str ? str : ""); + } + } + { + yajl_val tmp = get_val(tree, "attrs", yajl_t_string); + if (tmp != NULL) { + char *str = YAJL_GET_STRING(tmp); + ret->attrs = (uint8_t *)safe_strdup(str ? str : ""); + ret->attrs_len = str != NULL ? strlen(str) : 0; + } + } + + if (tree->type == yajl_t_object && (ctx->options & PARSE_OPTIONS_STRICT)) { + int i; + for (i = 0; i < tree->u.object.len; i++) + if (strcmp(tree->u.object.keys[i], "log") && + strcmp(tree->u.object.keys[i], "stream") && + strcmp(tree->u.object.keys[i], "time") && + strcmp(tree->u.object.keys[i], "attrs")) { + if (ctx->stderr > 0) + fprintf(ctx->stderr, "WARNING: unknown key found: %s\n", tree->u.object.keys[i]); + } + } + return ret; +} + +void free_logger_json_file(logger_json_file *ptr) { + if (ptr == NULL) + return; + free(ptr->log); + ptr->log = NULL; + free(ptr->stream); + ptr->stream = NULL; + free(ptr->time); + ptr->time = NULL; + free(ptr->attrs); + ptr->attrs = NULL; + free(ptr); +} + +yajl_gen_status gen_logger_json_file(yajl_gen g, logger_json_file *ptr, struct parser_context *ctx, parser_error *err) { + yajl_gen_status stat = yajl_gen_status_ok; + *err = 0; + stat = reformat_start_map(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) || (ptr != NULL && ptr->log != NULL && ptr->log_len)) { + const char *str = ""; + size_t len = 0; + stat = reformat_map_key(g, "log", strlen("log")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->log != NULL) { + str = (const char *)ptr->log; + len = ptr->log_len; + } + stat = reformat_string(g, str, len); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->stream != NULL)) { + char *str = ""; + stat = reformat_map_key(g, "stream", strlen("stream")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->stream != NULL) { + str = ptr->stream; + } + stat = reformat_string(g, str, strlen(str)); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->time != NULL)) { + char *str = ""; + stat = reformat_map_key(g, "time", strlen("time")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->time != NULL) { + str = ptr->time; + } + stat = reformat_string(g, str, strlen(str)); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) || (ptr != NULL && ptr->attrs != NULL && ptr->attrs_len)) { + const char *str = ""; + size_t len = 0; + stat = reformat_map_key(g, "attrs", strlen("attrs")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->attrs != NULL) { + str = (const char *)ptr->attrs; + len = ptr->attrs_len; + } + stat = reformat_string(g, str, len); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_end_map(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + return yajl_gen_status_ok; +} + + +logger_json_file *logger_json_file_parse_file(const char *filename, struct parser_context *ctx, parser_error *err) { + logger_json_file *ptr = NULL; + size_t filesize; + char *content = NULL; + + if (filename == NULL || err == NULL) + return NULL; + + *err = NULL; + content = read_file(filename, &filesize); + if (content == NULL) { + if (asprintf(err, "cannot read the file: %s", filename) < 0) + *err = safe_strdup("error allocating memory"); + return NULL; + } + ptr = logger_json_file_parse_data(content, ctx, err); + free(content); + return ptr; +} + +logger_json_file *logger_json_file_parse_file_stream(FILE *stream, struct parser_context *ctx, parser_error *err) { + logger_json_file *ptr = NULL; + size_t filesize; + char *content = NULL ; + + if (stream == NULL || err == NULL) + return NULL; + + *err = NULL; + content = fread_file(stream, &filesize); + if (content == NULL) { + *err = safe_strdup("cannot read the file"); + return NULL; + } + ptr = logger_json_file_parse_data(content, ctx, err); + free(content); + return ptr; +} + +logger_json_file *logger_json_file_parse_data(const char *jsondata, struct parser_context *ctx, parser_error *err) { + logger_json_file *ptr = NULL; + yajl_val tree; + char errbuf[1024]; + struct parser_context tmp_ctx; + + if (jsondata == NULL || err == NULL) + return NULL; + + *err = NULL; + if (ctx == NULL) { + ctx = &tmp_ctx; + memset(&tmp_ctx, 0, sizeof(tmp_ctx)); + } + tree = yajl_tree_parse(jsondata, errbuf, sizeof(errbuf)); + if (tree == NULL) { + if (asprintf(err, "cannot parse the data: %s", errbuf) < 0) + *err = safe_strdup("error allocating memory"); + return NULL; + } + ptr = make_logger_json_file(tree, ctx, err); + yajl_tree_free(tree); + return ptr; +} +char *logger_json_file_generate_json(logger_json_file *ptr, struct parser_context *ctx, parser_error *err) { + yajl_gen g = NULL; + struct parser_context tmp_ctx; + const unsigned char *gen_buf = NULL; + char *json_buf = NULL; + size_t gen_len = 0; + + if (ptr == NULL || err == NULL) + return NULL; + + *err = NULL; + if (ctx == NULL) { + ctx = &tmp_ctx; + memset(&tmp_ctx, 0, sizeof(tmp_ctx)); + } + + if (!json_gen_init(&g, ctx)) { + *err = safe_strdup("Json_gen init failed"); + goto out; + } + if (yajl_gen_status_ok != gen_logger_json_file(g, ptr, ctx, err)) { + if (*err == NULL) + *err = safe_strdup("Failed to generate json"); + goto free_out; + } + yajl_gen_get_buf(g, &gen_buf, &gen_len); + if (gen_buf == NULL) { + *err = safe_strdup("Error to get generated json"); + goto free_out; + } + + if (gen_len == SIZE_MAX) { + *err = safe_strdup("Invalid buffer length"); + goto free_out; + } + json_buf = safe_malloc(gen_len + 1); + (void)memcpy(json_buf, gen_buf, gen_len); + json_buf[gen_len] = '\0'; + +free_out: + yajl_gen_clear(g); + yajl_gen_free(g); +out: + return json_buf; +} diff --git a/src/lxc/json/logger_json_file.h b/src/lxc/json/logger_json_file.h new file mode 100644 index 000000000..ad5af7b49 --- /dev/null +++ b/src/lxc/json/logger_json_file.h @@ -0,0 +1,45 @@ +// Generated from json-file.json. Do not edit! +#ifndef LOGGER_JSON_FILE_SCHEMA_H +#define LOGGER_JSON_FILE_SCHEMA_H + +#include +#include +#include "json_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + uint8_t *log; + size_t log_len; + + char *stream; + + char *time; + + uint8_t *attrs; + size_t attrs_len; + +} +logger_json_file; + +void free_logger_json_file(logger_json_file *ptr); + +logger_json_file *make_logger_json_file(yajl_val tree, struct parser_context *ctx, parser_error *err); + +yajl_gen_status gen_logger_json_file(yajl_gen g, logger_json_file *ptr, struct parser_context *ctx, parser_error *err); + +logger_json_file *logger_json_file_parse_file(const char *filename, struct parser_context *ctx, parser_error *err); + +logger_json_file *logger_json_file_parse_file_stream(FILE *stream, struct parser_context *ctx, parser_error *err); + +logger_json_file *logger_json_file_parse_data(const char *jsondata, struct parser_context *ctx, parser_error *err); + +char *logger_json_file_generate_json(logger_json_file *ptr, struct parser_context *ctx, parser_error *err); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/lxc/json/oci_runtime_hooks.c b/src/lxc/json/oci_runtime_hooks.c new file mode 100644 index 000000000..41ddb672d --- /dev/null +++ b/src/lxc/json/oci_runtime_hooks.c @@ -0,0 +1,52 @@ +/****************************************************************************** + * Copyright (C), 1988-1999, Huawei Tech. Co., Ltd. + * FileName: oci_runtime_hooks.c + * Author: maoweiyong Version: 0.1 Date: 2018-11-07 + * Explanation: provide oci runtime hooks functions + ******************************************************************************/ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include "oci_runtime_hooks.h" + +#include "log.h" +#include "utils.h" + +#define PARSE_ERR_BUFFER_SIZE 1024 + +oci_runtime_spec_hooks *oci_runtime_spec_hooks_parse_file(const char *filename, + struct parser_context *ctx, parser_error *err) +{ + yajl_val tree; + size_t filesize; + + if (!filename || !err) { + return NULL; + } + *err = NULL; + struct parser_context tmp_ctx; + if (!ctx) { + ctx = &tmp_ctx; + memset(&tmp_ctx, 0, sizeof(tmp_ctx)); + } + char *content = read_file(filename, &filesize); + char errbuf[PARSE_ERR_BUFFER_SIZE]; + if (content == NULL) { + if (asprintf(err, "cannot read the file: %s", filename) < 0) { + *err = safe_strdup("error allocating memory"); + } + return NULL; + } + tree = yajl_tree_parse(content, errbuf, sizeof(errbuf)); + free(content); + if (tree == NULL) { + if (asprintf(err, "cannot parse the file: %s", errbuf) < 0) { + *err = safe_strdup("error allocating memory"); + } + return NULL; + } + oci_runtime_spec_hooks *ptr = make_oci_runtime_spec_hooks(tree, ctx, err); + yajl_tree_free(tree); + return ptr; +} diff --git a/src/lxc/json/oci_runtime_hooks.h b/src/lxc/json/oci_runtime_hooks.h new file mode 100644 index 000000000..bf570c9e0 --- /dev/null +++ b/src/lxc/json/oci_runtime_hooks.h @@ -0,0 +1,15 @@ +/****************************************************************************** + * Copyright (C), 1988-1999, Huawei Tech. Co., Ltd. + * FileName: oci_runtime_hooks.h + * Author: tanyifeng Version: 0.1 Date: 2018-11-08 + * Explanation: provide container oci runtime hooks function definition + ******************************************************************************/ +#ifndef _CONTAINER_HOOKS_H +# define _CONTAINER_HOOKS_H + +# include "oci_runtime_spec.h" + +oci_runtime_spec_hooks *oci_runtime_spec_hooks_parse_file(const char *filename, + struct parser_context *ctx, parser_error *err); + +#endif diff --git a/src/lxc/json/oci_runtime_spec.c b/src/lxc/json/oci_runtime_spec.c new file mode 100644 index 000000000..fd342deb9 --- /dev/null +++ b/src/lxc/json/oci_runtime_spec.c @@ -0,0 +1,195 @@ +// Generated from spec.json. Do not edit! +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include "oci_runtime_spec.h" + +oci_runtime_spec_hooks *make_oci_runtime_spec_hooks(yajl_val tree, struct parser_context *ctx, parser_error *err) { + oci_runtime_spec_hooks *ret = NULL; + *err = 0; + if (tree == NULL) + return ret; + ret = safe_malloc(sizeof(*ret)); + { + yajl_val tmp = get_val(tree, "prestart", yajl_t_array); + if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { + size_t i; + ret->prestart_len = YAJL_GET_ARRAY(tmp)->len; + ret->prestart = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->prestart)); + for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { + yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; + ret->prestart[i] = make_defs_hook(val, ctx, err); + if (ret->prestart[i] == NULL) { + free_oci_runtime_spec_hooks(ret); + return NULL; + } + } + } + } + { + yajl_val tmp = get_val(tree, "poststart", yajl_t_array); + if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { + size_t i; + ret->poststart_len = YAJL_GET_ARRAY(tmp)->len; + ret->poststart = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->poststart)); + for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { + yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; + ret->poststart[i] = make_defs_hook(val, ctx, err); + if (ret->poststart[i] == NULL) { + free_oci_runtime_spec_hooks(ret); + return NULL; + } + } + } + } + { + yajl_val tmp = get_val(tree, "poststop", yajl_t_array); + if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { + size_t i; + ret->poststop_len = YAJL_GET_ARRAY(tmp)->len; + ret->poststop = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->poststop)); + for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { + yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; + ret->poststop[i] = make_defs_hook(val, ctx, err); + if (ret->poststop[i] == NULL) { + free_oci_runtime_spec_hooks(ret); + return NULL; + } + } + } + } + + if (tree->type == yajl_t_object && (ctx->options & PARSE_OPTIONS_STRICT)) { + int i; + for (i = 0; i < tree->u.object.len; i++) + if (strcmp(tree->u.object.keys[i], "prestart") && + strcmp(tree->u.object.keys[i], "poststart") && + strcmp(tree->u.object.keys[i], "poststop")) { + if (ctx->stderr > 0) + fprintf(ctx->stderr, "WARNING: unknown key found: %s\n", tree->u.object.keys[i]); + } + } + return ret; +} + +void free_oci_runtime_spec_hooks(oci_runtime_spec_hooks *ptr) { + if (ptr == NULL) + return; + if (ptr->prestart != NULL) { + size_t i; + for (i = 0; i < ptr->prestart_len; i++) + if (ptr->prestart[i] != NULL) { + free_defs_hook(ptr->prestart[i]); + ptr->prestart[i] = NULL; + } + free(ptr->prestart); + ptr->prestart = NULL; + } + if (ptr->poststart != NULL) { + size_t i; + for (i = 0; i < ptr->poststart_len; i++) + if (ptr->poststart[i] != NULL) { + free_defs_hook(ptr->poststart[i]); + ptr->poststart[i] = NULL; + } + free(ptr->poststart); + ptr->poststart = NULL; + } + if (ptr->poststop != NULL) { + size_t i; + for (i = 0; i < ptr->poststop_len; i++) + if (ptr->poststop[i] != NULL) { + free_defs_hook(ptr->poststop[i]); + ptr->poststop[i] = NULL; + } + free(ptr->poststop); + ptr->poststop = NULL; + } + free(ptr); +} + +yajl_gen_status gen_oci_runtime_spec_hooks(yajl_gen g, oci_runtime_spec_hooks *ptr, struct parser_context *ctx, parser_error *err) { + yajl_gen_status stat = yajl_gen_status_ok; + *err = 0; + stat = reformat_start_map(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->prestart != NULL)) { + size_t len = 0, i; + stat = reformat_map_key(g, "prestart", strlen("prestart")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->prestart != NULL) { + len = ptr->prestart_len; + } + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 0); + stat = reformat_start_array(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + for (i = 0; i < len; i++) { + stat = gen_defs_hook(g, ptr->prestart[i], ctx, err); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_end_array(g); + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 1); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->poststart != NULL)) { + size_t len = 0, i; + stat = reformat_map_key(g, "poststart", strlen("poststart")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->poststart != NULL) { + len = ptr->poststart_len; + } + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 0); + stat = reformat_start_array(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + for (i = 0; i < len; i++) { + stat = gen_defs_hook(g, ptr->poststart[i], ctx, err); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_end_array(g); + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 1); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->poststop != NULL)) { + size_t len = 0, i; + stat = reformat_map_key(g, "poststop", strlen("poststop")); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + if (ptr != NULL && ptr->poststop != NULL) { + len = ptr->poststop_len; + } + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 0); + stat = reformat_start_array(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + for (i = 0; i < len; i++) { + stat = gen_defs_hook(g, ptr->poststop[i], ctx, err); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_end_array(g); + if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) + yajl_gen_config(g, yajl_gen_beautify, 1); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + } + stat = reformat_end_map(g); + if (yajl_gen_status_ok != stat) + GEN_SET_ERROR_AND_RETURN(stat, err); + return yajl_gen_status_ok; +} diff --git a/src/lxc/json/oci_runtime_spec.h b/src/lxc/json/oci_runtime_spec.h new file mode 100644 index 000000000..ef3f1619a --- /dev/null +++ b/src/lxc/json/oci_runtime_spec.h @@ -0,0 +1,37 @@ +// Generated from spec.json. Do not edit! +#ifndef OCI_RUNTIME_SPEC_SCHEMA_H +#define OCI_RUNTIME_SPEC_SCHEMA_H + +#include +#include +#include "json_common.h" +#include "defs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + defs_hook **prestart; + size_t prestart_len; + + defs_hook **poststart; + size_t poststart_len; + + defs_hook **poststop; + size_t poststop_len; + +} +oci_runtime_spec_hooks; + +void free_oci_runtime_spec_hooks(oci_runtime_spec_hooks *ptr); + +oci_runtime_spec_hooks *make_oci_runtime_spec_hooks(yajl_val tree, struct parser_context *ctx, parser_error *err); + +yajl_gen_status gen_oci_runtime_spec_hooks(yajl_gen g, oci_runtime_spec_hooks *ptr, struct parser_context *ctx, parser_error *err); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/lxc/json/read-file.c b/src/lxc/json/read-file.c new file mode 100644 index 000000000..70e73e51a --- /dev/null +++ b/src/lxc/json/read-file.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "read-file.h" + +#ifndef O_CLOEXEC +#define O_CLOEXEC 02000000 +#endif + +char *fread_file(FILE *stream, size_t *length) +{ + char *buf = NULL, *tmpbuf = NULL; + size_t off = 0; + + while (1) { + size_t ret, newsize; + + newsize = off + BUFSIZ + 1; + tmpbuf = (char *)calloc(1, newsize); + if (tmpbuf == NULL) { + goto out; + } + + if (buf) { + memcpy(tmpbuf, buf, off); + + memset(buf, 0, off); + + free(buf); + } + + buf = tmpbuf; + ret = fread(buf + off, 1, BUFSIZ, stream); + if (!ret && ferror(stream)) { + tmpbuf = NULL; + goto out; + } + if (ret < BUFSIZ || feof(stream)) { + *length = off + ret + 1; + buf[*length - 1] = '\0'; + return buf; + } + off += BUFSIZ; + } +out: + if (buf) { + free(buf); + } + if (tmpbuf) { + free(tmpbuf); + } + return NULL; + +} + +char *read_file(const char *path, size_t *length) +{ + char *buf = NULL; + char rpath[PATH_MAX + 1] = {0}; + int fd = -1; + int tmperrno; + FILE *fp = NULL; + + if (!path || !length) { + return NULL; + } + + if (strlen(path) > PATH_MAX || NULL == realpath(path, rpath)) { + return NULL; + } + + fd = open(rpath, O_RDONLY | O_CLOEXEC, 0640); + if (fd < 0) { + return NULL; + } + + fp = fdopen(fd, "r"); + tmperrno = errno; + if (!fp) { + close(fd); + errno = tmperrno; + return NULL; + } + + buf = fread_file(fp, length); + fclose(fp); + return buf; +} diff --git a/src/lxc/json/read-file.h b/src/lxc/json/read-file.h new file mode 100644 index 000000000..5d6e0eb62 --- /dev/null +++ b/src/lxc/json/read-file.h @@ -0,0 +1,11 @@ +#ifndef READ_FILE_H +#define READ_FILE_H + +#include +#include + +extern char *fread_file(FILE *stream, size_t *length); + +extern char *read_file(const char *path, size_t *length); + +#endif diff --git a/src/lxc/log.c b/src/lxc/log.c index 59644aa7a..79caa2cce 100644 --- a/src/lxc/log.c +++ b/src/lxc/log.c @@ -44,7 +44,7 @@ #define LXC_LOG_TIME_SIZE ((INTTYPE_TO_STRLEN(uint64_t)) * 2) int lxc_log_fd = -EBADF; -static bool wants_syslog = false; +static int syslog_enable = 0; int lxc_quiet_specified; int lxc_log_use_global_fd; static int lxc_loglevel_specified; @@ -55,6 +55,38 @@ static char *log_vmname = NULL; lxc_log_define(log, lxc); +#ifdef HAVE_ISULAD +static inline const char *isulad_get_fifo_path(const char *file) +{ +#define ISULAD_FIFO_PREFIX "fifo:" + + if (strncmp(file, ISULAD_FIFO_PREFIX, strlen(ISULAD_FIFO_PREFIX)) == 0) { + return (file + strlen(ISULAD_FIFO_PREFIX)); + } + return NULL; +} + +static int isulad_open_fifo(const char *file_path) +{ +#define LOG_FIFO_SIZE (1024 * 1024) + int fd; + + fd = lxc_unpriv(open(file_path, O_RDWR | O_NONBLOCK | O_CLOEXEC, 0640)); + if (fd == -1) { + fprintf(stderr, "Open fifo %s failed: %s\n", file_path, strerror(errno)); + return -1; + } + + if (fcntl(fd, F_SETPIPE_SZ, LOG_FIFO_SIZE) == -1) { + printf("Set fifo buffer size failed: %s", strerror(errno)); + close(fd); + return -1; + } + + return fd; +} +#endif + static int lxc_log_priority_to_syslog(int priority) { switch (priority) { @@ -128,7 +160,7 @@ static int log_append_syslog(const struct lxc_log_appender *appender, __do_free char *msg = NULL; const char *log_container_name; - if (!wants_syslog) + if (!syslog_enable) return 0; log_container_name = lxc_log_get_container_name(); @@ -321,6 +353,12 @@ static int log_append_logfile(const struct lxc_log_appender *appender, #endif log_container_name = lxc_log_get_container_name(); +#ifdef HAVE_ISULAD + /* use isulad log format */ + if (log_container_name != NULL && strlen(log_container_name) > 15) { + log_container_name = log_container_name + (strlen(log_container_name) - 15); + } +#endif if (fd_to_use < 0) fd_to_use = lxc_log_fd; @@ -333,9 +371,13 @@ static int log_append_logfile(const struct lxc_log_appender *appender, return ret; n = snprintf(buffer, sizeof(buffer), +#if HAVE_ISULAD + "%15s %s %-8s %s - %s:%s:%d -", +#else "%s%s%s %s %-8s %s - %s:%s:%d - ", log_prefix, log_container_name ? " " : "", +#endif log_container_name ? log_container_name : "", date_time, lxc_log_priority_to_string(event->priority), @@ -485,9 +527,10 @@ static int build_dir(const char *name) *p = '\0'; ret = lxc_unpriv(mkdir(n, 0755)); - *p = '/'; if (ret && errno != EEXIST) return log_error_errno(-errno, errno, "Failed to create directory \"%s\"", n); + + *p = '/'; } return 0; @@ -589,6 +632,13 @@ static int __lxc_log_set_file(const char *fname, int create_dirs) return ret_errno(EINVAL); } +#ifdef HAVE_ISULAD + fname = isulad_get_fifo_path(fname); + if (fname == NULL) { + return ret_errno(EINVAL); + } +#endif + #if USE_CONFIGPATH_LOGS /* We don't build_dir for the default if the default is i.e. * /var/lib/lxc/$container/$container.log. @@ -598,7 +648,11 @@ static int __lxc_log_set_file(const char *fname, int create_dirs) if (build_dir(fname)) return log_error_errno(-errno, errno, "Failed to create dir for log file \"%s\"", fname); +#if HAVE_ISULAD + lxc_log_fd = isulad_open_fifo(fname); +#else lxc_log_fd = log_open(fname); +#endif if (lxc_log_fd < 0) return lxc_log_fd; @@ -694,6 +748,9 @@ int lxc_log_init(struct lxc_log *log) if (lxc_log_fd >= 0) { lxc_log_category_lxc.appender = &log_appender_logfile; +#ifdef HAVE_ISULAD + if (!lxc_quiet_specified && !log->quiet) +#endif lxc_log_category_lxc.appender->next = &log_appender_stderr; } @@ -738,14 +795,9 @@ int lxc_log_syslog(int facility) return 0; } -void lxc_log_syslog_enable(void) -{ - wants_syslog = true; -} - -void lxc_log_syslog_disable(void) +inline void lxc_log_enable_syslog(void) { - wants_syslog = false; + syslog_enable = 1; } /* diff --git a/src/lxc/log.h b/src/lxc/log.h index 3f91d9bc5..d28065624 100644 --- a/src/lxc/log.h +++ b/src/lxc/log.h @@ -3,9 +3,6 @@ #ifndef __LXC_LOG_H #define __LXC_LOG_H -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 -#endif #include #include #include @@ -17,7 +14,6 @@ #include #include "conf.h" -#include "config.h" #ifndef O_CLOEXEC #define O_CLOEXEC 02000000 @@ -392,7 +388,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ LXC_FATAL(&locinfo, format, ##__VA_ARGS__); \ } while (0) -#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD +#if HAVE_M_FORMAT #define SYSTRACE(format, ...) \ TRACE("%m - " format, ##__VA_ARGS__) #else @@ -403,7 +399,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ } while (0) #endif -#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD +#if HAVE_M_FORMAT #define SYSDEBUG(format, ...) \ DEBUG("%m - " format, ##__VA_ARGS__) #else @@ -415,7 +411,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ #endif -#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD +#if HAVE_M_FORMAT #define SYSINFO(format, ...) \ INFO("%m - " format, ##__VA_ARGS__) #else @@ -426,7 +422,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ } while (0) #endif -#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD +#if HAVE_M_FORMAT #define SYSNOTICE(format, ...) \ NOTICE("%m - " format, ##__VA_ARGS__) #else @@ -437,7 +433,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ } while (0) #endif -#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD +#if HAVE_M_FORMAT #define SYSWARN(format, ...) \ WARN("%m - " format, ##__VA_ARGS__) #else @@ -448,7 +444,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ } while (0) #endif -#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD +#if HAVE_M_FORMAT #define SYSERROR(format, ...) \ ERROR("%m - " format, ##__VA_ARGS__) #else @@ -459,7 +455,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ } while (0) #endif -#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD +#if HAVE_M_FORMAT #define CMD_SYSERROR(format, ...) \ fprintf(stderr, "%s: %d: %s - %m - " format "\n", __FILE__, __LINE__, \ __func__, ##__VA_ARGS__); @@ -472,7 +468,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ } while (0) #endif -#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD +#if HAVE_M_FORMAT #define CMD_SYSINFO(format, ...) \ printf("%s: %d: %s - %m - " format "\n", __FILE__, __LINE__, __func__, \ ##__VA_ARGS__); @@ -563,8 +559,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ extern int lxc_log_fd; extern int lxc_log_syslog(int facility); -extern void lxc_log_syslog_enable(void); -extern void lxc_log_syslog_disable(void); +extern void lxc_log_enable_syslog(void); extern int lxc_log_set_level(int *dest, int level); extern int lxc_log_get_level(void); extern bool lxc_log_has_valid_level(void); diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c index 02f824f97..f251e5e7e 100644 --- a/src/lxc/lsm/apparmor.c +++ b/src/lxc/lsm/apparmor.c @@ -19,7 +19,7 @@ #include "log.h" #include "lsm.h" #include "parse.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "utils.h" lxc_log_define(apparmor, lsm); @@ -121,8 +121,8 @@ static const char AA_PROFILE_BASE[] = " # deny reads from debugfs\n" " deny /sys/kernel/debug/{,**} rwklx,\n" "\n" -" # allow paths to be made dependent, shared, private or unbindable\n" -" # TODO: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n" +" # allow paths to be made slave, shared, private or unbindable\n" +" # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n" "# mount options=(rw,make-slave) -> **,\n" "# mount options=(rw,make-rslave) -> **,\n" "# mount options=(rw,make-shared) -> **,\n" @@ -343,7 +343,7 @@ static const char AA_PROFILE_NESTING_BASE[] = " mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/,\n" " mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,\n" "\n" -" # TODO: There doesn't seem to be a way to ask for:\n" +" # FIXME: There doesn't seem to be a way to ask for:\n" " # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n" " # as we always get mount to $cdir/proc/sys with those flags denied\n" " # So allow all mounts until that is straightened out:\n" @@ -538,7 +538,7 @@ static inline char *apparmor_namespace(const char *ctname, const char *lxcpath) return full; } -/* TODO: This is currently run only in the context of a constructor (via the +/* FIXME: This is currently run only in the context of a constructor (via the * initial lsm_init() called due to its __attribute__((constructor)), so we * do not have ERROR/... macros available, so there are some fprintf(stderr)s * in there. @@ -560,7 +560,7 @@ static bool check_apparmor_parser_version() lxc_pclose(parserpipe); /* We stay silent for now as this most likely means the shell * lxc_popen executed failed to find the apparmor_parser binary. - * See the TODO comment above for details. + * See the FIXME comment above for details. */ return false; } diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h index 630eff0b4..ec2feaa5b 100644 --- a/src/lxc/lxc.h +++ b/src/lxc/lxc.h @@ -32,9 +32,14 @@ struct lxc_handler; * @daemonize : whether or not the container is daemonized * Returns 0 on success, < 0 otherwise */ +#ifdef HAVE_ISULAD +extern int lxc_start(char *const argv[], struct lxc_handler *handler, + const char *lxcpath, bool daemonize, int *error_num, + unsigned int start_timeout); +#else extern int lxc_start(char *const argv[], struct lxc_handler *handler, const char *lxcpath, bool daemonize, int *error_num); - +#endif /* * Start the specified command inside an application container * @name : the name of the container @@ -44,9 +49,15 @@ extern int lxc_start(char *const argv[], struct lxc_handler *handler, * @daemonize : whether or not the container is daemonized * Returns 0 on success, < 0 otherwise */ +#ifdef HAVE_ISULAD +extern int lxc_execute(const char *name, char *const argv[], int quiet, + struct lxc_handler *handler, const char *lxcpath, + bool daemonize, int *error_num, unsigned int start_timeout); +#else extern int lxc_execute(const char *name, char *const argv[], int quiet, struct lxc_handler *handler, const char *lxcpath, bool daemonize, int *error_num); +#endif /* * Close the fd associated with the monitoring @@ -83,6 +94,13 @@ extern lxc_state_t lxc_state(const char *name, const char *lxcpath); */ extern struct lxc_container *lxc_container_new(const char *name, const char *configpath); +#ifdef HAVE_ISULAD +/* + * Create a new container without loading config. + */ +extern struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath); +#endif + /* * Returns 1 on success, 0 on failure. */ diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index aac621482..eef98df67 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -49,7 +49,7 @@ #include "namespace.h" #include "network.h" #include "parse.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "start.h" #include "state.h" #include "storage.h" @@ -62,6 +62,10 @@ #include "utils.h" #include "version.h" +#ifdef HAVE_ISULAD +#include "exec_commands.h" +#endif + #if HAVE_OPENSSL #include #endif @@ -281,6 +285,13 @@ static void lxc_container_free(struct lxc_container *c) free(c->config_path); c->config_path = NULL; +#ifdef HAVE_ISULAD + free(c->exit_fifo); + c->exit_fifo = NULL; + free(c->ocihookfile); + c->ocihookfile = NULL; +#endif + free(c); } @@ -505,6 +516,20 @@ static bool do_lxcapi_is_running(struct lxc_container *c) WRAP_API(bool, lxcapi_is_running) +#ifdef HAVE_ISULAD +static bool do_lxcapi_freeze(struct lxc_container *c) +{ + if (!c || !c->lxc_conf) { + return false; + } + + if (lxc_freeze(c->lxc_conf, c->name, c->config_path) < 0) { + return false; + } + + return true; +} +#else static bool do_lxcapi_freeze(struct lxc_container *c) { lxc_state_t s; @@ -518,9 +543,25 @@ static bool do_lxcapi_freeze(struct lxc_container *c) return true; } +#endif + WRAP_API(bool, lxcapi_freeze) +#ifdef HAVE_ISULAD +static bool do_lxcapi_unfreeze(struct lxc_container *c) +{ + if (!c || !c->lxc_conf) { + return false; + } + + if (lxc_unfreeze(c->lxc_conf, c->name, c->config_path) < 0) { + return false; + } + + return true; +} +#else static bool do_lxcapi_unfreeze(struct lxc_container *c) { lxc_state_t s; @@ -534,15 +575,16 @@ static bool do_lxcapi_unfreeze(struct lxc_container *c) return true; } +#endif WRAP_API(bool, lxcapi_unfreeze) -static int do_lxcapi_console_getfd(struct lxc_container *c, int *ttynum, int *ptmxfd) +static int do_lxcapi_console_getfd(struct lxc_container *c, int *ttynum, int *masterfd) { if (!c) return -1; - return lxc_terminal_getfd(c, ttynum, ptmxfd); + return lxc_terminal_getfd(c, ttynum, masterfd); } WRAP_API_2(int, lxcapi_console_getfd, int *, int *) @@ -623,6 +665,66 @@ static bool load_config_locked(struct lxc_container *c, const char *fname) return true; } +#ifdef HAVE_ISULAD +static bool load_ocihooks_locked(struct lxc_container *c) +{ + parser_error err = NULL; + oci_runtime_spec_hooks *hooks = NULL; + + if (!c->lxc_conf) + c->lxc_conf = lxc_conf_init(); + + if (!c->lxc_conf) + return false; + + hooks = oci_runtime_spec_hooks_parse_file(c->ocihookfile, NULL, &err); + if (!hooks) { + fprintf(stderr, "parse oci hooks config failed: %s\n", err); + free(err); + return true; + } + c->lxc_conf->ocihooks = hooks; + + if (err) + free(err); + return true; +} + +/* + * isulad: set oci hook file path + * */ +static bool set_oci_hook_config_filename(struct lxc_container *c) +{ +#define OCI_HOOK_JSON_FILE_NAME "ocihooks.json" + char *newpath = NULL; + int len, ret; + + if (!c->config_path) + return false; + + /* $lxc_path + "/" + c->name + "/" + "config" + '\0' */ + if (strlen(c->config_path) + strlen(c->name) > SIZE_MAX - strlen(OCI_HOOK_JSON_FILE_NAME) - 3) + return false; + len = strlen(c->config_path) + strlen(c->name) + strlen(OCI_HOOK_JSON_FILE_NAME) + 3; + + newpath = malloc(len); + if (newpath == NULL) + return false; + + ret = snprintf(newpath, len, "%s/%s/%s", c->config_path, c->name, OCI_HOOK_JSON_FILE_NAME); + if (ret < 0 || ret >= len) { + fprintf(stderr, "Error printing out config file name\n"); + free(newpath); + return false; + } + + free(c->ocihookfile); + c->ocihookfile = newpath; + + return true; +} +#endif + static bool do_lxcapi_load_config(struct lxc_container *c, const char *alt_file) { int lret; @@ -656,6 +758,11 @@ static bool do_lxcapi_load_config(struct lxc_container *c, const char *alt_file) ret = load_config_locked(c, fname); +#ifdef HAVE_ISULAD + if (ret && file_exists(c->ocihookfile)) + ret = load_ocihooks_locked(c); +#endif + if (need_disklock) container_disk_unlock(c); else @@ -830,12 +937,14 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid) DEBUG("First child %d exited", pid); /* Close write end of the socket pair. */ - close_prot_errno_disarm(handler->state_socket_pair[1]); + close(handler->state_socket_pair[1]); + handler->state_socket_pair[1] = -1; state = lxc_rcv_status(handler->state_socket_pair[0]); /* Close read end of the socket pair. */ - close_prot_errno_disarm(handler->state_socket_pair[0]); + close(handler->state_socket_pair[0]); + handler->state_socket_pair[0] = -1; if (state < 0) { SYSERROR("Failed to receive the container state"); @@ -855,6 +964,33 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid) return true; } +#ifdef HAVE_ISULAD +/* isulad: use init argv as init cmd */ +static char **use_init_args(char **init_argv, size_t init_args) +{ + size_t i; + int nargs = 0; + char **argv; + + if (!init_argv) + return NULL; + + do { + argv = malloc(sizeof(char *)); + } while (!argv); + + argv[0] = NULL; + for (i = 0; i < init_args; i++) + push_arg(&argv, init_argv[i], &nargs); + + if (nargs == 0) { + free(argv); + return NULL; + } + return argv; +} +#endif + static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const argv[]) { int ret; @@ -865,6 +1001,13 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a NULL, }; char **init_cmd = NULL; +#ifdef HAVE_ISULAD + int keepfds[] = {-1, -1, -1, -1, -1}; + ssize_t size_read; + char errbuf[BUFSIZ + 1] = {0}; +#else + int keepfds[3] = {-1, -1, -1}; +#endif /* container does exist */ if (!c) @@ -898,7 +1041,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a conf = c->lxc_conf; /* initialize handler */ - handler = lxc_init_handler(NULL, c->name, conf, c->config_path, c->daemonize); + handler = lxc_init_handler(c->name, conf, c->config_path, c->daemonize); container_mem_unlock(c); if (!handler) @@ -911,11 +1054,17 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a argv = init_cmd = split_init_cmd(conf->init_cmd); } +#ifdef HAVE_ISULAD + if (!argv) { + argv = init_cmd = use_init_args(conf->init_argv, conf->init_argc); + } +#endif + /* ... otherwise use default_args. */ if (!argv) { if (useinit) { ERROR("No valid init detected"); - lxc_put_handler(handler); + lxc_free_handler(handler); return false; } argv = default_args; @@ -930,10 +1079,23 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a char title[2048]; pid_t pid_first, pid_second; +#ifdef HAVE_ISULAD + //isulad: pipdfd for get error message of child or grandchild process. + if (pipe2(conf->errpipe, O_CLOEXEC) != 0) { + SYSERROR("Failed to init errpipe"); + free_init_cmd(init_cmd); + lxc_free_handler(handler); + return false; + } +#endif + pid_first = fork(); if (pid_first < 0) { free_init_cmd(init_cmd); - lxc_put_handler(handler); + lxc_free_handler(handler); +#ifdef HAVE_ISULAD + lxc_close_error_pipe(conf->errpipe); +#endif return false; } @@ -943,14 +1105,28 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a * the PID file, child will do the free and unlink. */ c->pidfile = NULL; +#ifdef HAVE_ISULAD + close(conf->errpipe[1]); + conf->errpipe[1] = -1; +#endif /* Wait for container to tell us whether it started * successfully. */ started = wait_on_daemonized_start(handler, pid_first); +#ifdef HAVE_ISULAD + if (!started) { + size_read = read(conf->errpipe[0], errbuf, BUFSIZ); + if (size_read > 0) { + conf->errmsg = safe_strdup(errbuf); + } + } + close(conf->errpipe[0]); + conf->errpipe[0] = -1; +#endif free_init_cmd(init_cmd); - lxc_put_handler(handler); + lxc_free_handler(handler); return started; } @@ -982,7 +1158,10 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a /* second parent */ if (pid_second != 0) { free_init_cmd(init_cmd); - lxc_put_handler(handler); + lxc_free_handler(handler); +#ifdef HAVE_ISULAD + lxc_close_error_pipe(conf->errpipe); +#endif _exit(EXIT_SUCCESS); } @@ -995,7 +1174,16 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a _exit(EXIT_FAILURE); } - ret = inherit_fds(handler, true); + keepfds[0] = handler->conf->maincmd_fd; + keepfds[1] = handler->state_socket_pair[0]; + keepfds[2] = handler->state_socket_pair[1]; +#ifdef HAVE_ISULAD + keepfds[4] = conf->errpipe[1]; + close(conf->errpipe[0]); + conf->errpipe[0] = -1; +#endif + ret = lxc_check_inherited(conf, true, keepfds, + sizeof(keepfds) / sizeof(keepfds[0])); if (ret < 0) _exit(EXIT_FAILURE); @@ -1013,7 +1201,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a } else if (!am_single_threaded()) { ERROR("Cannot start non-daemonized container when threaded"); free_init_cmd(init_cmd); - lxc_put_handler(handler); + lxc_free_handler(handler); return false; } @@ -1027,7 +1215,10 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a w = snprintf(pidstr, sizeof(pidstr), "%d", lxc_raw_getpid()); if (w < 0 || (size_t)w >= sizeof(pidstr)) { free_init_cmd(init_cmd); - lxc_put_handler(handler); + lxc_free_handler(handler); +#ifdef HAVE_ISULAD + lxc_close_error_pipe(conf->errpipe); +#endif SYSERROR("Failed to write monitor pid to \"%s\"", c->pidfile); @@ -1040,7 +1231,10 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a ret = lxc_write_to_file(c->pidfile, pidstr, w, false, 0600); if (ret < 0) { free_init_cmd(init_cmd); - lxc_put_handler(handler); + lxc_free_handler(handler); +#ifdef HAVE_ISULAD + lxc_close_error_pipe(conf->errpipe); +#endif SYSERROR("Failed to write monitor pid to \"%s\"", c->pidfile); @@ -1051,6 +1245,19 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a } } +#ifdef HAVE_ISULAD + /* isulad: open exit fifo */ + if (c->exit_fifo) { + conf->exit_fd = lxc_open(c->exit_fifo, O_WRONLY | O_NONBLOCK | O_CLOEXEC, 0); + if (conf->exit_fd < 0) { + ERROR("Failed to open exit fifo %s: %s.", c->exit_fifo, strerror(errno)); + lxc_free_handler(handler); + ret = 1; + goto on_error; + } + } +#endif + conf->reboot = REBOOT_NONE; /* Unshare the mount namespace if requested */ @@ -1058,15 +1265,15 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a ret = unshare(CLONE_NEWNS); if (ret < 0) { SYSERROR("Failed to unshare mount namespace"); - lxc_put_handler(handler); + lxc_free_handler(handler); ret = 1; goto on_error; } ret = mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL); if (ret < 0) { - SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); - lxc_put_handler(handler); + SYSERROR("Failed to make / rslave at startup"); + lxc_free_handler(handler); ret = 1; goto on_error; } @@ -1075,26 +1282,55 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a reboot: if (conf->reboot == REBOOT_INIT) { /* initialize handler */ - handler = lxc_init_handler(handler, c->name, conf, c->config_path, c->daemonize); + handler = lxc_init_handler(c->name, conf, c->config_path, c->daemonize); if (!handler) { ret = 1; goto on_error; } } - ret = inherit_fds(handler, c->daemonize); + keepfds[0] = handler->conf->maincmd_fd; + keepfds[1] = handler->state_socket_pair[0]; + keepfds[2] = handler->state_socket_pair[1]; + +#ifdef HAVE_ISULAD + /* keep exit fifo fd */ + if (conf->exit_fd >= 0) { + keepfds[3] = conf->exit_fd; + } + /* isulad: keep errpipe fd */ + if (c->daemonize) + keepfds[4] = conf->errpipe[1]; +#endif + + ret = lxc_check_inherited(conf, c->daemonize, keepfds, + sizeof(keepfds) / sizeof(keepfds[0])); if (ret < 0) { - lxc_put_handler(handler); + lxc_free_handler(handler); ret = 1; goto on_error; } - if (useinit) +#ifdef HAVE_ISULAD + if (useinit) { + ret = lxc_execute(c->name, argv, 1, handler, c->config_path, + c->daemonize, &c->error_num, c->start_timeout); + } + else { + handler->disable_pty = c->disable_pty; + handler->open_stdin = c->open_stdin; + ret = lxc_start(argv, handler, c->config_path, c->daemonize, + &c->error_num, c->start_timeout); +#else + if (useinit) { ret = lxc_execute(c->name, argv, 1, handler, c->config_path, c->daemonize, &c->error_num); - else + } + else { ret = lxc_start(argv, handler, c->config_path, c->daemonize, &c->error_num); +#endif + } if (conf->reboot == REBOOT_REQ) { INFO("Container requested reboot"); @@ -1185,6 +1421,7 @@ WRAP_API(bool, lxcapi_stop) static int do_create_container_dir(const char *path, struct lxc_conf *conf) { + __do_free char *p = NULL; int lasterr; int ret = -1; @@ -1200,8 +1437,10 @@ static int do_create_container_dir(const char *path, struct lxc_conf *conf) ret = 0; } + p = must_copy_string(path); + if (!lxc_list_empty(&conf->id_map)) { - ret = chown_mapped_root(path, conf); + ret = chown_mapped_root(p, conf); if (ret < 0) ret = -1; } @@ -1345,8 +1584,14 @@ static bool create_run_template(struct lxc_container *c, char *tpath, _exit(EXIT_FAILURE); } - if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) - SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); + ret = detect_shared_rootfs(); + if (ret == 1) { + ret = mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL); + if (ret < 0) { + SYSERROR("Failed to make \"/\" rslave"); + ERROR("Continuing..."); + } + } } if (strcmp(bdev->type, "dir") != 0 && strcmp(bdev->type, "btrfs") != 0) { @@ -2048,7 +2293,12 @@ WRAP_API_1(bool, lxcapi_reboot2, int) static bool do_lxcapi_shutdown(struct lxc_container *c, int timeout) { __do_close int pidfd = -EBADF, state_client_fd = -EBADF; +#ifdef HAVE_ISULAD + // isulad: keep default signal the same as docker + int haltsignal = SIGTERM; +#else int haltsignal = SIGPWR; +#endif pid_t pid = -1; lxc_state_t states[MAX_STATE] = {0}; int killret, ret; @@ -2064,12 +2314,13 @@ static bool do_lxcapi_shutdown(struct lxc_container *c, int timeout) if (pid <= 0) return true; - /* Detect whether we should send SIGRTMIN + 3 (e.g. systemd). */ if (c->lxc_conf && c->lxc_conf->haltsignal) haltsignal = c->lxc_conf->haltsignal; +#ifndef HAVE_ISULAD + /* Detect whether we should send SIGRTMIN + 3 (e.g. systemd). */ else if (task_blocks_signal(pid, (SIGRTMIN + 3))) haltsignal = (SIGRTMIN + 3); - +#endif /* * Add a new state client before sending the shutdown signal so @@ -2090,41 +2341,41 @@ static bool do_lxcapi_shutdown(struct lxc_container *c, int timeout) if (ret < MAX_STATE) return false; - } - if (pidfd >= 0) { - struct pollfd pidfd_poll = { - .events = POLLIN, - .fd = pidfd, - }; + if (pidfd >= 0) { + struct pollfd pidfd_poll = { + .events = POLLIN, + .fd = pidfd, + }; - killret = lxc_raw_pidfd_send_signal(pidfd, haltsignal, - NULL, 0); - if (killret < 0) - return log_warn(false, "Failed to send signal %d to pidfd %d", - haltsignal, pidfd); + killret = lxc_raw_pidfd_send_signal(pidfd, haltsignal, + NULL, 0); + if (killret < 0) + return log_warn(false, "Failed to send signal %d to pidfd %d", + haltsignal, pidfd); - TRACE("Sent signal %d to pidfd %d", haltsignal, pidfd); + TRACE("Sent signal %d to pidfd %d", haltsignal, pidfd); - /* - * No need for going through all of the state server - * complications anymore. We can just poll on pidfds. :) - */ + /* + * No need for going through all of the state server + * complications anymore. We can just poll on pidfds. :) + */ - if (timeout != 0) { - ret = poll(&pidfd_poll, 1, timeout * 1000); - if (ret < 0 || !(pidfd_poll.revents & POLLIN)) - return false; + if (timeout != 0) { + ret = poll(&pidfd_poll, 1, timeout * 1000); + if (ret < 0 || !(pidfd_poll.revents & POLLIN)) + return false; - TRACE("Pidfd polling detected container exit"); - } - } else { - killret = kill(pid, haltsignal); - if (killret < 0) - return log_warn(false, "Failed to send signal %d to pid %d", - haltsignal, pid); + TRACE("Pidfd polling detected container exit"); + } + } else { + killret = kill(pid, haltsignal); + if (killret < 0) + return log_warn(false, "Failed to send signal %d to pid %d", + haltsignal, pid); - TRACE("Sent signal %d to pid %d", haltsignal, pid); + TRACE("Sent signal %d to pid %d", haltsignal, pid); + } } if (timeout == 0) @@ -2948,8 +3199,19 @@ static bool container_destroy(struct lxc_container *c, bool bret = false; int ret = 0; +#ifdef HAVE_ISULAD + if (!c) + return false; + // isulad: if container is not defined, we need to remove disk lock file + // which is created in lxc_container_new. + if (!do_lxcapi_is_defined(c)) { + container_disk_removelock(c); + return false; + } +#else if (!c || !do_lxcapi_is_defined(c)) return false; +#endif conf = c->lxc_conf; if (container_disk_lock(c)) @@ -3069,6 +3331,15 @@ static bool container_destroy(struct lxc_container *c, if (ret < 0) { ERROR("Failed to destroy directory \"%s\" for \"%s\"", path, c->name); +#ifdef HAVE_ISULAD + char msg[BUFSIZ] = { 0 }; + ret = snprintf(msg, BUFSIZ, "Failed to destroy directory \"%s\": %s", path, errno ? strerror(errno) : "error"); + if (ret < 0 || ret >= BUFSIZ) { + ERROR("Sprintf failed"); + goto out; + } + c->error_string = safe_strdup(msg); +#endif goto out; } INFO("Destroyed directory \"%s\" for \"%s\"", path, c->name); @@ -3081,13 +3352,23 @@ out: free(path); container_disk_unlock(c); +#ifdef HAVE_ISULAD + if (bret && container_disk_removelock(c)) { + bret = false; + } +#endif return bret; } static bool do_lxcapi_destroy(struct lxc_container *c) { +#ifdef HAVE_ISULAD + if (!c) + return false; +#else if (!c || !lxcapi_is_defined(c)) return false; +#endif if (c->lxc_conf && c->lxc_conf->rootfs.managed) { if (has_snapshots(c)) { @@ -3665,8 +3946,12 @@ static int clone_update_rootfs(struct clone_update_data *data) return -1; } - if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) - SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); + if (detect_shared_rootfs()) { + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) { + SYSERROR("Failed to make / rslave"); + ERROR("Continuing..."); + } + } if (bdev->ops->mount(bdev) < 0) { storage_put(bdev); @@ -4030,8 +4315,13 @@ static int lxcapi_attach(struct lxc_container *c, current_config = c->lxc_conf; +#ifdef HAVE_ISULAD + ret = lxc_attach(c, exec_function, exec_payload, options, + attached_process, &c->lxc_conf->errmsg); +#else ret = lxc_attach(c, exec_function, exec_payload, options, attached_process); +#endif current_config = NULL; return ret; } @@ -4051,7 +4341,11 @@ static int do_lxcapi_attach_run_wait(struct lxc_container *c, command.program = (char *)program; command.argv = (char **)argv; +#ifdef HAVE_ISULAD + ret = lxc_attach(c, lxc_attach_run_command, &command, options, &pid, NULL); +#else ret = lxc_attach(c, lxc_attach_run_command, &command, options, &pid); +#endif if (ret < 0) return ret; @@ -5230,7 +5524,223 @@ static int do_lxcapi_seccomp_notify_fd(struct lxc_container *c) WRAP_API(int, lxcapi_seccomp_notify_fd) +#ifdef HAVE_ISULAD +/* isulad add set console fifos*/ +static bool do_lxcapi_set_terminal_default_fifos(struct lxc_container *c, const char *in, const char *out, const char *err) +{ + struct lxc_conf *conf = NULL; + + if (!c || !c->lxc_conf) + return false; + if (container_mem_lock(c)) { + ERROR("Error getting mem lock"); + return false; + } + + conf = c->lxc_conf; + if (in) { + if (conf->console.init_fifo[0]) + free(conf->console.init_fifo[0]); + conf->console.init_fifo[0] = safe_strdup(in); + } + if (out) { + if (conf->console.init_fifo[1]) + free(conf->console.init_fifo[1]); + conf->console.init_fifo[1] = safe_strdup(out); + } + if (err) { + if (conf->console.init_fifo[2]) + free(conf->console.init_fifo[2]); + conf->console.init_fifo[2] = safe_strdup(err); + } + + container_mem_unlock(c); + return true; +} + +WRAP_API_3(bool, lxcapi_set_terminal_default_fifos, const char *, const char *, const char *) + +/* isulad add set info file path */ +static bool do_lxcapi_set_container_info_file(struct lxc_container *c, const char *info_file) +{ + struct lxc_conf *conf = NULL; + + if (!c || !c->lxc_conf || !info_file) + return false; + if (container_mem_lock(c)) { + ERROR("Error getting mem lock"); + return false; + } + + conf = c->lxc_conf; + if (conf->container_info_file) + free(conf->container_info_file); + conf->container_info_file = safe_strdup(info_file); + + container_mem_unlock(c); + return true; +} + +WRAP_API_1(bool, lxcapi_set_container_info_file, const char *) + +static bool do_lxcapi_want_disable_pty(struct lxc_container *c, bool state) +{ + if (!c || !c->lxc_conf) + return false; + + if (container_mem_lock(c)) + return false; + + c->disable_pty = state; + + container_mem_unlock(c); + + return true; +} + +WRAP_API_1(bool, lxcapi_want_disable_pty, bool) + +static bool do_lxcapi_want_open_stdin(struct lxc_container *c, bool state) +{ + if (!c || !c->lxc_conf) + return false; + + if (container_mem_lock(c)) + return false; + + c->open_stdin = state; + + container_mem_unlock(c); + + return true; +} + +WRAP_API_1(bool, lxcapi_want_open_stdin, bool) + +/* isulad add clean resources */ +static bool do_lxcapi_add_terminal_fifo(struct lxc_container *c, const char *in_fifo, const char *out_fifo, const char *err_fifo) +{ + bool ret = true; + + if (!c || !c->lxc_conf) + return false; + if (container_mem_lock(c)) { + ERROR("Error getting mem lock"); + return false; + } + + if (lxc_cmd_set_terminal_fifos(c->name, c->config_path, in_fifo, out_fifo, err_fifo)) { + ERROR("Error set console fifos"); + ret = false; + } + + container_mem_unlock(c); + return ret; +} + +WRAP_API_3(bool, lxcapi_add_terminal_fifo, const char *, const char *, const char *) + +static bool do_lxcapi_set_terminal_winch(struct lxc_container *c, unsigned int height, unsigned int width) +{ + bool ret = true; + + if (!c || !c->lxc_conf) + return false; + if (container_mem_lock(c)) { + ERROR("Error getting mem lock"); + return false; + } + + if (lxc_cmd_set_terminal_winch(c->name, c->config_path, height, width)) { + ERROR("Error set terminal winch"); + ret = false; + } + + container_mem_unlock(c); + return ret; +} + +WRAP_API_2(bool, lxcapi_set_terminal_winch, unsigned int, unsigned int) + +static bool do_lxcapi_set_exec_terminal_winch(struct lxc_container *c, const char *suffix, unsigned int height, unsigned int width) +{ + bool ret = true; + + if (!c || !c->lxc_conf) + return false; + if (container_mem_lock(c)) { + ERROR("Error getting mem lock"); + return false; + } + + if (lxc_exec_cmd_set_terminal_winch(c->name, c->config_path, suffix, height, width)) { + ERROR("Error set terminal winch"); + ret = false; + } + + container_mem_unlock(c); + return ret; +} + +WRAP_API_3(bool, lxcapi_set_exec_terminal_winch, const char *, unsigned int, unsigned int) + +/* isulad add clean resources */ +static bool do_lxcapi_clean_container_resource(struct lxc_container *c, pid_t pid) +{ + int ret; + + if (!c) + return false; + + ret = do_lxcapi_clean_resource(c->name, c->config_path, c->lxc_conf, pid); + if (ret) + ERROR("Failed to clean container %s resource", c->name); + return ret == 0; + +} + +WRAP_API_1(bool, lxcapi_clean_container_resource, pid_t) + +/* isulad get coantainer pids */ +static bool do_lxcapi_get_container_pids(struct lxc_container *c, pid_t **pids,size_t *pids_len) +{ + int ret; + + if (!c) + return false; + + ret = do_lxcapi_get_pids(c->name, c->config_path, c->lxc_conf, pids,pids_len); + if (ret) + ERROR("Failed to get container %s pids", c->name); + return ret == 0; + +} + +WRAP_API_2(bool, lxcapi_get_container_pids, pid_t **,size_t *) + +/* isulad add start timeout */ +static bool do_lxcapi_set_start_timeout(struct lxc_container *c, unsigned int start_timeout) +{ + if (!c || !c->lxc_conf) + return false; + if (container_mem_lock(c)) { + ERROR("Error getting mem lock"); + return false; + } + c->start_timeout = start_timeout; + container_mem_unlock(c); + return true; +} + +WRAP_API_1(bool, lxcapi_set_start_timeout, unsigned int) + +#endif + +#ifdef HAVE_ISULAD +static struct lxc_container *do_lxc_container_new(const char *name, const char *configpath, bool load_config) +#else struct lxc_container *lxc_container_new(const char *name, const char *configpath) +#endif { struct lxc_container *c; size_t len; @@ -5283,10 +5793,24 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath goto err; } +#ifdef HAVE_ISULAD + if (!set_oci_hook_config_filename(c)) { + fprintf(stderr, "Error allocating oci hooks file pathname\n"); + goto err; + } + + if (load_config && file_exists(c->configfile)) { + if (!lxcapi_load_config(c, NULL)) { + fprintf(stderr, "Failed to load config for %s\n", name); + goto err; + } + } +#else if (file_exists(c->configfile) && !lxcapi_load_config(c, NULL)) { fprintf(stderr, "Failed to load config for %s\n", name); goto err; } +#endif rc = ongoing_create(c); switch (rc) { @@ -5371,7 +5895,18 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath c->mount = lxcapi_mount; c->umount = lxcapi_umount; c->seccomp_notify_fd = lxcapi_seccomp_notify_fd; - +#ifdef HAVE_ISULAD + c->set_container_info_file = lxcapi_set_container_info_file; + c->set_terminal_init_fifos = lxcapi_set_terminal_default_fifos; + c->add_terminal_fifos = lxcapi_add_terminal_fifo; + c->set_terminal_winch = lxcapi_set_terminal_winch; + c->set_exec_terminal_winch = lxcapi_set_exec_terminal_winch; + c->want_disable_pty = lxcapi_want_disable_pty; + c->want_open_stdin = lxcapi_want_open_stdin; + c->clean_container_resource = lxcapi_clean_container_resource; + c->get_container_pids = lxcapi_get_container_pids; + c->set_start_timeout = lxcapi_set_start_timeout; +#endif return c; err: @@ -5379,6 +5914,19 @@ err: return NULL; } +#ifdef HAVE_ISULAD +// isulad: new container without load config to save time +struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath) +{ + return do_lxc_container_new(name, configpath, false); +} + +struct lxc_container *lxc_container_new(const char *name, const char *configpath) +{ + return do_lxc_container_new(name, configpath, true); +} +#endif + int lxc_get_wait_states(const char **states) { int i; @@ -5557,11 +6105,21 @@ int list_active_containers(const char *lxcpath, char ***nret, continue; } +#ifdef HAVE_ISULAD + if (ct_name && ct_name_cnt) { + if (array_contains(&ct_name, p, ct_name_cnt)) { + if (is_hashed) + free(p); + continue; + } + } +#else if (array_contains(&ct_name, p, ct_name_cnt)) { if (is_hashed) free(p); continue; } +#endif if (!add_to_array(&ct_name, p, ct_name_cnt)) { if (is_hashed) diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h index b4ec1d6d5..2951ac7b4 100644 --- a/src/lxc/lxccontainer.h +++ b/src/lxc/lxccontainer.h @@ -90,7 +90,7 @@ struct lxc_container { * \private * Container configuration. * - * \internal TODO: do we want the whole lxc_handler? + * \internal FIXME: do we want the whole lxc_handler? */ struct lxc_conf *lxc_conf; @@ -107,6 +107,30 @@ struct lxc_container { /*! Full path to configuration file */ char *config_path; + /*! isulad: + * \private + * exit FIFO File to open used monitor the state of lxc monitor process. + */ + char *exit_fifo; + /*! Whether container wishes to create pty or pipes for console log */ + bool disable_pty; + + /*! Whether container wishes to keep stdin active */ + bool open_stdin; + + /*! + * \private + * isulad: support oci hook from json file + * full path of json file + * */ + char *ocihookfile; + + /*! isulad: + * \private + * start_timeout. + */ + unsigned int start_timeout; + /*! * \brief Determine if \c /var/lib/lxc/$name/config exists. * @@ -563,7 +587,7 @@ struct lxc_container { * \param c Container. * \param[in,out] ttynum Terminal number to attempt to allocate, * or \c -1 to allocate the first available tty. - * \param[out] ptmxfd File descriptor referring to the ptmx side of the pty. + * \param[out] masterfd File descriptor referring to the master side of the pty. * * \return tty file descriptor number on success, or \c -1 on * failure. @@ -575,7 +599,7 @@ struct lxc_container { * descriptor when no longer required so that it may be allocated * by another caller. */ - int (*console_getfd)(struct lxc_container *c, int *ttynum, int *ptmxfd); + int (*console_getfd)(struct lxc_container *c, int *ttynum, int *masterfd); /*! * \brief Allocate and run a console tty. @@ -865,6 +889,93 @@ struct lxc_container { * \return pidfd of init process of the container. */ int (*init_pidfd)(struct lxc_container *c); + + /*! isulad add + * \brief An API call to set the path of info file + * + * \param c Container. + * \param info_file Value of the path of info file. + * + * \return \c true on success, else \c false. + */ + bool (*set_container_info_file) (struct lxc_container *c, const char *info_file); + + /*! isulad add + * \brief An API call to change the path of the console default fifos + * + * \param c Container. + * \param path Value of the console path. + * + * \return \c true on success, else \c false. + */ + bool (*set_terminal_init_fifos)(struct lxc_container *c, const char *in, const char *out, const char *err); + + /*! isulad add + * \brief An API call to add the path of terminal fifos + * + * \param c Container. + * \param path Value of the console path.. + * + * \return \c true on success, else \c false. + */ + bool (*add_terminal_fifos)(struct lxc_container *c, const char *in, const char *out, const char *err); + + bool (*set_terminal_winch)(struct lxc_container *c, unsigned int height, unsigned int width); + + bool (*set_exec_terminal_winch)(struct lxc_container *c, const char *suffix, unsigned int height, unsigned int width); + + /*! + * \brief Change whether the container wants to create pty or pipes + * from the console log. + * + * \param c Container. + * \param state Value for the disable pty bit (0 or 1). + * + * \return \c true on success, else \c false. + */ + bool (*want_disable_pty)(struct lxc_container *c, bool state); + + /*! + * \brief Change whether the container wants to keep stdin active + * for parent process of container + * + * \param c Container. + * \param state Value for the open_stdin bit (0 or 1). + * + * \return \c true on success, else \c false. + */ + bool (*want_open_stdin)(struct lxc_container *c, bool state); + + /*! isulad add + * \brief An API call to clean resources of container + * + * \param c Container. + * \param pid Value of container process. + * + * \return \c true on success, else \c false. + */ + bool (*clean_container_resource) (struct lxc_container *c, pid_t pid); + + /*! isulad add + * \brief An API call to get container pids + * + * \param c Container. + * \param pids Value of container pids. + * \param pids_len Value of container pids len. + * \param pid Value of container pid. + * \return \c true on success, else \c false. + */ + bool (*get_container_pids)(struct lxc_container *c,pid_t **pids,size_t *pids_len); + + /*! isulad add + * \brief An API call to set start timeout + * + * \param c Container. + * \param start_timeout Value of start timeout. + * + * \return \c true on success, else \c false. + */ + bool (*set_start_timeout)(struct lxc_container *c, unsigned int start_timeout); }; /*! @@ -998,6 +1109,18 @@ struct lxc_console_log { */ struct lxc_container *lxc_container_new(const char *name, const char *configpath); +/*! + * \brief Create a new container without loading config. + * + * \param name Name to use for container. + * \param configpath Full path to configuration file to use. + * + * \return Newly-allocated container, or \c NULL on error. + * + * \note This function can only used for listing container. + */ +struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath); + /*! * \brief Add a reference to the specified container. * diff --git a/src/lxc/lxclock.c b/src/lxc/lxclock.c index 318e5bf5a..bb0dca0c9 100644 --- a/src/lxc/lxclock.c +++ b/src/lxc/lxclock.c @@ -370,3 +370,30 @@ void container_disk_unlock(struct lxc_container *c) lxcunlock(c->slock); lxcunlock(c->privlock); } + +#ifdef HAVE_ISULAD +static int lxc_removelock(struct lxc_lock *l) +{ + int ret = 0; + + if (l->type == LXC_LOCK_FLOCK) { + ret = unlink(l->u.f.fname); + if (ret && errno != ENOENT) { + SYSERROR("Error unlink %s", l->u.f.fname); + return ret; + } + } + + return ret; +} + +int container_disk_removelock(struct lxc_container *c) +{ + int ret; + + ret = lxc_removelock(c->slock); + if (ret) + return ret; + return lxc_removelock(c->privlock); +} +#endif diff --git a/src/lxc/lxclock.h b/src/lxc/lxclock.h index 9f9bc3bf6..6a71d7c5e 100644 --- a/src/lxc/lxclock.h +++ b/src/lxc/lxclock.h @@ -154,4 +154,8 @@ extern int container_disk_lock(struct lxc_container *c); */ extern void container_disk_unlock(struct lxc_container *c); +#ifdef HAVE_ISULAD +int container_disk_removelock(struct lxc_container *c); +#endif + #endif diff --git a/src/lxc/macro.h b/src/lxc/macro.h index 7b2ad79ed..3df19d6d3 100644 --- a/src/lxc/macro.h +++ b/src/lxc/macro.h @@ -57,20 +57,6 @@ #define CAP_SETGID 6 #endif -/* move_mount */ -#ifndef MOVE_MOUNT_F_EMPTY_PATH -#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ -#endif - -/* open_tree */ -#ifndef OPEN_TREE_CLONE -#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ -#endif - -#ifndef OPEN_TREE_CLOEXEC -#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ -#endif - /* prctl */ #ifndef PR_CAPBSET_READ #define PR_CAPBSET_READ 23 @@ -433,9 +419,6 @@ enum { #define PTR_TO_UINT64(p) ((uint64_t)((intptr_t)(p))) -#define UINT_TO_PTR(u) ((void *) ((uintptr_t) (u))) -#define PTR_TO_USHORT(p) ((unsigned short)((uintptr_t)(p))) - #define LXC_INVALID_UID ((uid_t)-1) #define LXC_INVALID_GID ((gid_t)-1) diff --git a/src/lxc/mainloop.c b/src/lxc/mainloop.c index d5ae2a67a..6d4c5935a 100644 --- a/src/lxc/mainloop.c +++ b/src/lxc/mainloop.c @@ -59,10 +59,8 @@ int lxc_mainloop(struct lxc_epoll_descr *descr, int timeout_ms) } } -int lxc_mainloop_add_handler_events(struct lxc_epoll_descr *descr, int fd, - int events, - lxc_mainloop_callback_t callback, - void *data) +int lxc_mainloop_add_handler(struct lxc_epoll_descr *descr, int fd, + lxc_mainloop_callback_t callback, void *data) { __do_free struct mainloop_handler *handler = NULL; __do_free struct lxc_list *item = NULL; @@ -79,7 +77,7 @@ int lxc_mainloop_add_handler_events(struct lxc_epoll_descr *descr, int fd, handler->fd = fd; handler->data = data; - ev.events = events; + ev.events = EPOLLIN; ev.data.ptr = handler; if (epoll_ctl(descr->epfd, EPOLL_CTL_ADD, fd, &ev) < 0) @@ -94,13 +92,6 @@ int lxc_mainloop_add_handler_events(struct lxc_epoll_descr *descr, int fd, return 0; } -int lxc_mainloop_add_handler(struct lxc_epoll_descr *descr, int fd, - lxc_mainloop_callback_t callback, void *data) -{ - return lxc_mainloop_add_handler_events(descr, fd, EPOLLIN, callback, - data); -} - int lxc_mainloop_del_handler(struct lxc_epoll_descr *descr, int fd) { struct mainloop_handler *handler; diff --git a/src/lxc/mainloop.h b/src/lxc/mainloop.h index e6ab9a6d9..8afac60d3 100644 --- a/src/lxc/mainloop.h +++ b/src/lxc/mainloop.h @@ -22,10 +22,6 @@ typedef int (*lxc_mainloop_callback_t)(int fd, uint32_t event, void *data, extern int lxc_mainloop(struct lxc_epoll_descr *descr, int timeout_ms); -extern int lxc_mainloop_add_handler_events(struct lxc_epoll_descr *descr, - int fd, int events, - lxc_mainloop_callback_t callback, - void *data); extern int lxc_mainloop_add_handler(struct lxc_epoll_descr *descr, int fd, lxc_mainloop_callback_t callback, void *data); diff --git a/src/lxc/memory_utils.h b/src/lxc/memory_utils.h index d3b68a1e9..29878fb67 100644 --- a/src/lxc/memory_utils.h +++ b/src/lxc/memory_utils.h @@ -41,10 +41,10 @@ define_cleanup_function(FILE *, fclose); define_cleanup_function(DIR *, closedir); #define __do_closedir call_cleaner(closedir) -#define free_disarm(ptr) \ - ({ \ - free(ptr); \ - ptr = NULL; \ +#define free_disarm(ptr) \ + ({ \ + free(ptr); \ + move_ptr(ptr); \ }) static inline void free_disarm_function(void *ptr) diff --git a/src/lxc/namespace.c b/src/lxc/namespace.c index f2e017563..38d2ae5d7 100644 --- a/src/lxc/namespace.c +++ b/src/lxc/namespace.c @@ -21,6 +21,33 @@ lxc_log_define(namespace, lxc); +/* + * Let's use the "standard stack limit" (i.e. glibc thread size default) for + * stack sizes: 8MB. + */ +#define __LXC_STACK_SIZE (8 * 1024 * 1024) +pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd) +{ + pid_t ret; + void *stack; + + stack = malloc(__LXC_STACK_SIZE); + if (!stack) { + SYSERROR("Failed to allocate clone stack"); + return -ENOMEM; + } + +#ifdef __ia64__ + ret = __clone2(fn, stack, __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); +#else + ret = clone(fn, stack + __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); +#endif + if (ret < 0) + SYSERROR("Failed to clone (%#x)", flags); + + return ret; +} + /* Leave the user namespace at the first position in the array of structs so * that we always attach to it first when iterating over the struct and using * setns() to switch namespaces. This especially affects lxc_attach(): Suppose diff --git a/src/lxc/namespace.h b/src/lxc/namespace.h index 84976f60f..a8fda783c 100644 --- a/src/lxc/namespace.h +++ b/src/lxc/namespace.h @@ -7,6 +7,63 @@ #include #include +#ifndef CLONE_PARENT_SETTID +#define CLONE_PARENT_SETTID 0x00100000 +#endif + +#ifndef CLONE_CHILD_CLEARTID +#define CLONE_CHILD_CLEARTID 0x00200000 +#endif + +#ifndef CLONE_CHILD_SETTID +#define CLONE_CHILD_SETTID 0x01000000 +#endif + +#ifndef CLONE_VFORK +#define CLONE_VFORK 0x00004000 +#endif + +#ifndef CLONE_THREAD +#define CLONE_THREAD 0x00010000 +#endif + +#ifndef CLONE_SETTLS +#define CLONE_SETTLS 0x00080000 +#endif + +#ifndef CLONE_VM +#define CLONE_VM 0x00000100 +#endif + +#ifndef CLONE_FILES +#define CLONE_FILES 0x00000400 +#endif + +#ifndef CLONE_FS +# define CLONE_FS 0x00000200 +#endif +#ifndef CLONE_NEWNS +# define CLONE_NEWNS 0x00020000 +#endif +#ifndef CLONE_NEWCGROUP +# define CLONE_NEWCGROUP 0x02000000 +#endif +#ifndef CLONE_NEWUTS +# define CLONE_NEWUTS 0x04000000 +#endif +#ifndef CLONE_NEWIPC +# define CLONE_NEWIPC 0x08000000 +#endif +#ifndef CLONE_NEWUSER +# define CLONE_NEWUSER 0x10000000 +#endif +#ifndef CLONE_NEWPID +# define CLONE_NEWPID 0x20000000 +#endif +#ifndef CLONE_NEWNET +# define CLONE_NEWNET 0x40000000 +#endif + enum { LXC_NS_USER, LXC_NS_MNT, @@ -25,6 +82,39 @@ extern const struct ns_info { const char *env_name; } ns_info[LXC_NS_MAX]; +#if defined(__ia64__) +int __clone2(int (*__fn) (void *__arg), void *__child_stack_base, + size_t __child_stack_size, int __flags, void *__arg, ...); +#else +int clone(int (*fn)(void *), void *child_stack, + int flags, void *arg, ... + /* pid_t *ptid, struct user_desc *tls, pid_t *ctid */ ); +#endif + +/** + * lxc_clone() - create a new process + * + * - allocate stack: + * This function allocates a new stack the size of page and passes it to the + * kernel. + * + * - support all CLONE_*flags: + * This function supports all CLONE_* flags. If in doubt or not sufficiently + * familiar with process creation in the kernel and interactions with libcs + * this function should be used. + * + * - pthread_atfork() handlers depending on libc: + * Whether this function runs pthread_atfork() handlers depends on the + * corresponding libc wrapper. glibc currently does not run pthread_atfork() + * handlers but does not guarantee that they are not. Other libcs might or + * might not run pthread_atfork() handlers. If you require guarantees please + * refer to the lxc_raw_clone*() functions in raw_syscalls.{c,h}. + * + * - should call lxc_raw_getpid(): + * The child should use lxc_raw_getpid() to retrieve its pid. + */ +extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd); + extern int lxc_namespace_2_cloneflag(const char *namespace); extern int lxc_namespace_2_ns_idx(const char *namespace); extern int lxc_namespace_2_std_identifiers(char *namespaces); diff --git a/src/lxc/network.c b/src/lxc/network.c index bca044059..19adb2329 100644 --- a/src/lxc/network.c +++ b/src/lxc/network.c @@ -36,7 +36,7 @@ #include "memory_utils.h" #include "network.h" #include "nl.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "syscall_wrappers.h" #include "utils.h" @@ -182,6 +182,11 @@ static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex) return 0; } +struct ip_proxy_args { + const char *ip; + const char *dev; +}; + static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest) { call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL; @@ -319,15 +324,11 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd } if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) { - if (!lxc_nic_exists(netdev->link)) { - SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link); - goto out_delete; - } - err = lxc_bridge_attach(netdev->link, veth1); if (err) { errno = -err; - SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link); + SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", + veth1, netdev->link); goto out_delete; } INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link); @@ -482,6 +483,8 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n } strlcpy(netdev->created_name, peer, IFNAMSIZ); + if (is_empty_string(netdev->name)) + (void)strlcpy(netdev->name, peer, IFNAMSIZ); netdev->ifindex = if_nametoindex(peer); if (!netdev->ifindex) { @@ -531,7 +534,7 @@ on_error: return -1; } -static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation) +static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation) { call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL; struct nl_handler nlh; @@ -540,7 +543,7 @@ static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int struct ifinfomsg *ifi; struct rtattr *nest, *nest2; - len = strlen(parent); + len = strlen(master); if (len == 1 || len >= IFNAMSIZ) return ret_errno(EINVAL); @@ -548,13 +551,13 @@ static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int if (len == 1 || len >= IFNAMSIZ) return ret_errno(EINVAL); - index = if_nametoindex(parent); + index = if_nametoindex(master); if (!index) return ret_errno(EINVAL); err = netlink_open(nlh_ptr, NETLINK_ROUTE); if (err) - return err; + return ret_errno(-err); nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE); if (!nlmsg) @@ -579,21 +582,24 @@ static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan")) return ret_errno(EPROTO); - nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA); - if (!nest2) - return ret_errno(EPROTO); + if (mode) { + nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA); + if (!nest2) + return ret_errno(EPROTO); - if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode)) - return ret_errno(EPROTO); + if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode)) + return ret_errno(EPROTO); - /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and - * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway - * according to ipvlan docs. - */ - if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation)) - return ret_errno(EPROTO); + /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0 + * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs. + */ + if (isolation > 0 && + nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation)) + return ret_errno(EPROTO); + + nla_end_nested(nlmsg, nest2); + } - nla_end_nested(nlmsg, nest2); nla_end_nested(nlmsg, nest); if (nla_put_u32(nlmsg, IFLA_LINK, index)) @@ -631,6 +637,8 @@ static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *ne } strlcpy(netdev->created_name, peer, IFNAMSIZ); + if (is_empty_string(netdev->name)) + (void)strlcpy(netdev->name, peer, IFNAMSIZ); netdev->ifindex = if_nametoindex(peer); if (!netdev->ifindex) { @@ -704,6 +712,8 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd } strlcpy(netdev->created_name, peer, IFNAMSIZ); + if (is_empty_string(netdev->name)) + (void)strlcpy(netdev->name, peer, IFNAMSIZ); netdev->ifindex = if_nametoindex(peer); if (!netdev->ifindex) { @@ -859,7 +869,7 @@ static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = { [LXC_NET_NONE] = instantiate_none, }; -static int __instantiate_ns_common(struct lxc_netdev *netdev) +static int instantiate_ns_veth(struct lxc_netdev *netdev) { char current_ifname[IFNAMSIZ]; @@ -901,30 +911,33 @@ static int __instantiate_ns_common(struct lxc_netdev *netdev) return 0; } -static int instantiate_ns_veth(struct lxc_netdev *netdev) +static int __instantiate_common(struct lxc_netdev *netdev) { + netdev->ifindex = if_nametoindex(netdev->name); + if (!netdev->ifindex) + return log_error_errno(-1, errno, "Failed to retrieve ifindex for network device with name %s", netdev->name); - return __instantiate_ns_common(netdev); + return 0; } static int instantiate_ns_macvlan(struct lxc_netdev *netdev) { - return __instantiate_ns_common(netdev); + return __instantiate_common(netdev); } static int instantiate_ns_ipvlan(struct lxc_netdev *netdev) { - return __instantiate_ns_common(netdev); + return __instantiate_common(netdev); } static int instantiate_ns_vlan(struct lxc_netdev *netdev) { - return __instantiate_ns_common(netdev); + return __instantiate_common(netdev); } static int instantiate_ns_phys(struct lxc_netdev *netdev) { - return __instantiate_ns_common(netdev); + return __instantiate_common(netdev); } static int instantiate_ns_empty(struct lxc_netdev *netdev) @@ -1736,7 +1749,7 @@ int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned in } /* TODO: merge with lxc_macvlan_create */ -int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid) +int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid) { call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL; struct nl_handler nlh; @@ -1749,7 +1762,7 @@ int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid) if (err) return err; - len = strlen(parent); + len = strlen(master); if (len == 1 || len >= IFNAMSIZ) return ret_errno(EINVAL); @@ -1765,7 +1778,7 @@ int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid) if (!answer) return ret_errno(ENOMEM); - lindex = if_nametoindex(parent); + lindex = if_nametoindex(master); if (!lindex) return ret_errno(EINVAL); @@ -1804,7 +1817,7 @@ int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid) return netlink_transaction(nlh_ptr, nlmsg, answer); } -int lxc_macvlan_create(const char *parent, const char *name, int mode) +int lxc_macvlan_create(const char *master, const char *name, int mode) { call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL; struct nl_handler nlh; @@ -1817,7 +1830,7 @@ int lxc_macvlan_create(const char *parent, const char *name, int mode) if (err) return err; - len = strlen(parent); + len = strlen(master); if (len == 1 || len >= IFNAMSIZ) return ret_errno(EINVAL); @@ -1833,7 +1846,7 @@ int lxc_macvlan_create(const char *parent, const char *name, int mode) if (!answer) return ret_errno(ENOMEM); - index = if_nametoindex(parent); + index = if_nametoindex(master); if (!index) return ret_errno(EINVAL); @@ -2834,9 +2847,6 @@ bool lxc_delete_network_unpriv(struct lxc_handler *handler) netdev->ifindex, netdev->link); ret = netdev_deconf[netdev->type](handler, netdev); - if (ret < 0) - WARN("Failed to deconfigure interface with index %d and initial name \"%s\"", - netdev->ifindex, netdev->link); goto clear_ifindices; } @@ -3110,9 +3120,9 @@ int lxc_network_move_created_netdev_priv(struct lxc_handler *handler) physname = is_wlan(netdev->link); if (physname) - ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL); + ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->name); else - ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL); + ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->name); if (ret) return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d", netdev->created_name, @@ -3219,9 +3229,6 @@ bool lxc_delete_network_priv(struct lxc_handler *handler) } ret = netdev_deconf[netdev->type](handler, netdev); - if (ret < 0) - WARN("Failed to deconfigure interface with index %d and initial name \"%s\"", - netdev->ifindex, netdev->link); goto clear_ifindices; } @@ -3441,10 +3448,18 @@ static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netde /* set the network device up */ if (netdev->flags & IFF_UP) { + +#ifdef HAVE_ISULAD + if (netdev->name[0] != '\0') { + err = lxc_netdev_up(netdev->name); + if (err) + return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name); + } +#else err = lxc_netdev_up(netdev->name); if (err) return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name); - +#endif /* the network is up, make the loopback up too */ err = lxc_netdev_up("lo"); if (err) diff --git a/src/lxc/network.h b/src/lxc/network.h index ba35c1253..696380c90 100644 --- a/src/lxc/network.h +++ b/src/lxc/network.h @@ -205,8 +205,8 @@ extern int lxc_netdev_set_mtu(const char *name, int mtu); /* Create a virtual network devices. */ extern int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu); -extern int lxc_macvlan_create(const char *parent, const char *name, int mode); -extern int lxc_vlan_create(const char *parent, const char *name, +extern int lxc_macvlan_create(const char *master, const char *name, int mode); +extern int lxc_vlan_create(const char *master, const char *name, unsigned short vid); /* Set ip address. */ diff --git a/src/lxc/path.c b/src/lxc/path.c new file mode 100644 index 000000000..65b8aadbf --- /dev/null +++ b/src/lxc/path.c @@ -0,0 +1,655 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved + * Description: isulad utils + * Author: lifeng + * Create: 2020-04-11 +******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "path.h" +#include "log.h" +#include "isulad_utils.h" + +lxc_log_define(lxc_path_ui, lxc); + +#define ISSLASH(C) ((C) == '/') +#define IS_ABSOLUTE_FILE_NAME(F) (ISSLASH ((F)[0])) +#define IS_RELATIVE_FILE_NAME(F) (! IS_ABSOLUTE_FILE_NAME (F)) + +bool specify_current_dir(const char *path) +{ + char *basec = NULL, *bname = NULL; + bool res = false; + + basec = safe_strdup(path); + + bname = basename(basec); + if (bname == NULL) { + free(basec); + ERROR("Out of memory"); + return false; + } + res = !strcmp(bname, "."); + free(basec); + return res; +} + +bool has_traling_path_separator(const char *path) +{ + return path && strlen(path) && (path[strlen(path) - 1] == '/'); +} + +// PreserveTrailingDotOrSeparator returns the given cleaned path +// and appends a trailing `/.` or `/` if its corresponding original +// path ends with a trailing `/.` or `/`. If the cleaned +// path already ends in a `.` path segment, then another is not added. If the +// clean path already ends in a path separator, then another is not added. +char *preserve_trailing_dot_or_separator(const char *cleanedpath, + const char *originalpath) +{ + char *respath = NULL; + size_t len; + + if (strlen(cleanedpath) > (SIZE_MAX - 3)) { + return NULL; + } + + len = strlen(cleanedpath) + 3; + respath = malloc(len); + if (respath == NULL) { + ERROR("Out of memory"); + return NULL; + } + memset(respath, 0x00, len); + strcat(respath, cleanedpath); + + if (!specify_current_dir(cleanedpath) && specify_current_dir(originalpath)) { + if (!has_traling_path_separator(respath)) + strcat(respath, "/"); + strcat(respath, "."); + } + + if (!has_traling_path_separator(respath) && + has_traling_path_separator(originalpath)) + strcat(respath, "/"); + + return respath; +} + + +// Split splits path immediately following the final Separator, +// separating it into a directory and file name component. +// If there is no Separator in path, Split returns an empty dir +// and file set to path. +// The returned values have the property that path = dir+file. +bool filepath_split(const char *path, char **dir, char **base) +{ + ssize_t i; + size_t len; + + len = strlen(path); + if (len >= PATH_MAX) { + ERROR("Invalid path"); + return false; + } + i = len - 1; + while (i >= 0 && path[i] != '/') + i--; + + *dir = malloc(i + 2); + if (*dir == NULL) { + ERROR("Out of memory"); + return false; + } + memcpy(*dir, path, i + 1); + *(*dir + i + 1) = '\0'; + + *base = safe_strdup(path + i + 1); + + return true; +} + + +static bool do_clean_path_continue(const char *endpos, const char *stpos, const char *respath, char **dst) +{ + if (endpos - stpos == 1 && stpos[0] == '.') { + return true; + } else if (endpos - stpos == 2 && stpos[0] == '.' && stpos[1] == '.') { + char *dest = *dst; + if (dest <= respath + 1) { + return true; + } + for (--dest; dest > respath && !ISSLASH(dest[-1]); --dest) { + *dst = dest; + return true; + } + *dst = dest; + return true; + } + return false; +} + +int do_clean_path(const char *respath, const char *limit_respath, + const char *stpos, char **dst) +{ + char *dest = *dst; + const char *endpos = NULL; + + for (endpos = stpos; *stpos; stpos = endpos) { + while (ISSLASH(*stpos)) { + ++stpos; + } + + for (endpos = stpos; *endpos && !ISSLASH(*endpos); ++endpos) { + } + + if (endpos - stpos == 0) { + break; + } else if (do_clean_path_continue(endpos, stpos, respath, &dest)) { + continue; + } + + if (!ISSLASH(dest[-1])) { + *dest++ = '/'; + } + + if (dest + (endpos - stpos) >= limit_respath) { + ERROR("Path is too long"); + if (dest > respath + 1) { + dest--; + } + *dest = '\0'; + return -1; + } + + memcpy(dest, stpos, (size_t)(endpos - stpos)); + dest += endpos - stpos; + *dest = '\0'; + } + *dst = dest; + return 0; +} + +char *cleanpath(const char *path, char *realpath, size_t realpath_len) +{ + char *respath = NULL; + char *dest = NULL; + const char *stpos = NULL; + const char *limit_respath = NULL; + + if (path == NULL || path[0] == '\0' || \ + realpath == NULL || (realpath_len < PATH_MAX)) { + return NULL; + } + + respath = realpath; + + memset(respath, 0, realpath_len); + limit_respath = respath + PATH_MAX; + + if (!IS_ABSOLUTE_FILE_NAME(path)) { + if (!getcwd(respath, PATH_MAX)) { + ERROR("Failed to getcwd"); + respath[0] = '\0'; + goto error; + } + dest = strchr(respath, '\0'); + if (dest == NULL) { + ERROR("Failed to get the end of respath"); + goto error; + } + if (strlen(path) > (PATH_MAX - strlen(respath) - 1)) { + ERROR("Path is too long"); + goto error; + } + strcat(respath, path); + stpos = path; + } else { + dest = respath; + *dest++ = '/'; + stpos = path; + } + + if (do_clean_path(respath, limit_respath, stpos, &dest)) { + goto error; + } + + if (dest > respath + 1 && ISSLASH(dest[-1])) { + --dest; + } + *dest = '\0'; + + return respath; + +error: + return NULL; +} + +static int do_path_realloc(const char *start, const char *end, + char **rpath, char **dest, const char **rpath_limit) +{ + long long dest_offset = *dest - *rpath; + char *new_rpath = NULL; + size_t new_size; + int nret = 0; + size_t gap = 0; + + if (*dest + (end - start) < *rpath_limit) { + return 0; + } + + gap = (size_t)(end - start) + 1; + new_size = (size_t)(*rpath_limit - *rpath); + if (new_size > SIZE_MAX - gap) { + ERROR("Out of range!"); + return -1; + } + + if (gap > PATH_MAX) { + new_size += gap; + } else { + new_size += PATH_MAX; + } + nret = lxc_mem_realloc((void **)&new_rpath, new_size, *rpath, PATH_MAX); + if (nret) { + ERROR("Failed to realloc memory for files limit variables"); + return -1; + } + *rpath = new_rpath; + *rpath_limit = *rpath + new_size; + + *dest = *rpath + dest_offset; + + return 0; +} + +static int do_get_symlinks_copy_buf(const char *buf, const char *prefix, size_t prefix_len, + char **rpath, char **dest) +{ + if (IS_ABSOLUTE_FILE_NAME(buf)) { + if (prefix_len) { + memcpy(*rpath, prefix, prefix_len); + } + *dest = *rpath + prefix_len; + *(*dest)++ = '/'; + } else { + if (*dest > *rpath + prefix_len + 1) { + for (--(*dest); *dest > *rpath && !ISSLASH((*dest)[-1]); --(*dest)) { + continue; + } + } + } + return 0; +} + +static int do_get_symlinks(const char **fullpath, const char *prefix, size_t prefix_len, + char **rpath, char **dest, const char **end, + int *num_links, char **extra_buf) +{ + char *buf = NULL; + size_t len; + ssize_t n; + int ret = -1; + + if (++(*num_links) > MAXSYMLINKS) { + ERROR("Too many links in '%s'", *fullpath); + goto out; + } + + buf = lxc_common_calloc_s(PATH_MAX); + if (buf == NULL) { + ERROR("Out of memory"); + goto out; + } + + n = readlink(*rpath, buf, PATH_MAX - 1); + if (n < 0) { + goto out; + } + buf[n] = '\0'; + + if (*extra_buf == NULL) { + *extra_buf = lxc_common_calloc_s(PATH_MAX); + if (*extra_buf == NULL) { + ERROR("Out of memory"); + goto out; + } + } + + len = strlen(*end); + if (len >= PATH_MAX - n) { + ERROR("Path is too long"); + goto out; + } + + memmove(&(*extra_buf)[n], *end, len + 1); + memcpy(*extra_buf, buf, (size_t)n); + + *fullpath = *end = *extra_buf; + + if (do_get_symlinks_copy_buf(buf, prefix, prefix_len, rpath, dest) != 0) { + goto out; + } + + ret = 0; +out: + free(buf); + return ret; +} + +static bool do_eval_symlinks_in_scope_is_symlink(const char *path) +{ + struct stat st; + + if (lstat(path, &st) < 0) { + return true; + } + + if (!S_ISLNK(st.st_mode)) { + return true; + } + return false; +} + +static void do_eval_symlinks_skip_slash(const char **start, const char **end) +{ + while (ISSLASH(**start)) { + ++(*start); + } + + for (*end = *start; **end && !ISSLASH(**end); ++(*end)) { + } +} + +static inline void skip_dest_traling_slash(char **dest, char **rpath, size_t prefix_len) +{ + if (*dest > *rpath + prefix_len + 1) { + for (--(*dest); *dest > *rpath && !ISSLASH((*dest)[-1]); --(*dest)) { + continue; + } + } +} + +static inline bool is_current_char(const char c) +{ + return c == '.'; +} + +static inline bool is_specify_current(const char *end, const char *start) +{ + return (end - start == 1) && is_current_char(start[0]); +} + +static inline bool is_specify_parent(const char *end, const char *start) +{ + return (end - start == 2) && is_current_char(start[0]) && is_current_char(start[1]); +} + +static int do_eval_symlinks_in_scope(const char *fullpath, const char *prefix, + size_t prefix_len, + char **rpath, char **dest, const char *rpath_limit) +{ + const char *start = NULL; + const char *end = NULL; + char *extra_buf = NULL; + int nret = 0; + int num_links = 0; + + start = fullpath + prefix_len; + for (end = start; *start; start = end) { + do_eval_symlinks_skip_slash(&start, &end); + if (end - start == 0) { + break; + } else if (is_specify_current(end, start)) { + ; + } else if (is_specify_parent(end, start)) { + skip_dest_traling_slash(dest, rpath, prefix_len); + } else { + if (!ISSLASH((*dest)[-1])) { + *(*dest)++ = '/'; + } + + nret = do_path_realloc(start, end, rpath, dest, &rpath_limit); + if (nret != 0) { + nret = -1; + goto out; + } + + memcpy(*dest, start, (size_t)(end - start)); + *dest += end - start; + **dest = '\0'; + + if (do_eval_symlinks_in_scope_is_symlink(*rpath)) { + continue; + } + + nret = do_get_symlinks(&fullpath, prefix, prefix_len, rpath, dest, &end, &num_links, &extra_buf); + if (nret != 0) { + nret = -1; + goto out; + } + } + } +out: + free(extra_buf); + return nret; +} +static char *eval_symlinks_in_scope(const char *fullpath, const char *rootpath) +{ + char resroot[PATH_MAX] = {0}; + char *root = NULL; + char *rpath = NULL; + char *dest = NULL; + char *prefix = NULL; + const char *rpath_limit = NULL; + size_t prefix_len; + + if (fullpath == NULL || rootpath == NULL) { + return NULL; + } + + root = cleanpath(rootpath, resroot, sizeof(resroot)); + if (root == NULL) { + ERROR("Failed to get cleaned path"); + return NULL; + } + + if (!strcmp(fullpath, root)) { + return safe_strdup(fullpath); + } + + if (strstr(fullpath, root) == NULL) { + ERROR("Path '%s' is not in '%s'", fullpath, root); + return NULL; + } + + rpath = lxc_common_calloc_s(PATH_MAX); + if (rpath == NULL) { + ERROR("Out of memory"); + goto out; + } + rpath_limit = rpath + PATH_MAX; + + prefix = root; + prefix_len = (size_t)strlen(prefix); + if (!strcmp(prefix, "/")) { + prefix_len = 0; + } + + dest = rpath; + if (prefix_len) { + memcpy(rpath, prefix, prefix_len); + dest += prefix_len; + } + *dest++ = '/'; + + if (do_eval_symlinks_in_scope(fullpath, prefix, prefix_len, &rpath, &dest, + rpath_limit)) { + goto out; + } + + if (dest > rpath + prefix_len + 1 && ISSLASH(dest[-1])) { + --dest; + } + *dest = '\0'; + return rpath; + +out: + free(rpath); + return NULL; +} + +// FollowSymlinkInScope is a wrapper around evalSymlinksInScope that returns an +// absolute path. This function handles paths in a platform-agnostic manner. +char *follow_symlink_in_scope(const char *fullpath, const char *rootpath) +{ + char resfull[PATH_MAX] = {0}, *full = NULL; + char resroot[PATH_MAX] = {0}, *root = NULL; + + full = cleanpath(fullpath, resfull, PATH_MAX); + if (!full) { + ERROR("Failed to get cleaned path"); + return NULL; + } + + root = cleanpath(rootpath, resroot, PATH_MAX); + if (!root) { + ERROR("Failed to get cleaned path"); + return NULL; + } + + return eval_symlinks_in_scope(full, root); +} + +// GetResourcePath evaluates `path` in the scope of the container's rootpath, with proper path +// sanitisation. Symlinks are all scoped to the rootpath of the container, as +// though the container's rootpath was `/`. +// +// The BaseFS of a container is the host-facing path which is bind-mounted as +// `/` inside the container. This method is essentially used to access a +// particular path inside the container as though you were a process in that +// container. +int get_resource_path(const char *rootpath, const char *path, + char **scopepath) +{ + char resolved[PATH_MAX] = {0}, *cleanedpath = NULL; + char *fullpath = NULL; + size_t len; + + if (!rootpath || !path || !scopepath) + return -1; + + *scopepath = NULL; + + cleanedpath = cleanpath(path, resolved, PATH_MAX); + if (!cleanedpath) { + ERROR("Failed to get cleaned path"); + return -1; + } + + len = strlen(rootpath) + strlen(cleanedpath) + 1; + fullpath = malloc(len); + if (!fullpath) { + ERROR("Out of memory"); + return -1; + } + snprintf(fullpath, len, "%s%s", rootpath, cleanedpath); + + *scopepath = follow_symlink_in_scope(fullpath, rootpath); + + free(fullpath); + return 0; +} + +// Rel returns a relative path that is lexically equivalent to targpath when +// joined to basepath with an intervening separator. That is, +// Join(basepath, Rel(basepath, targpath)) is equivalent to targpath itself. +// On success, the returned path will always be relative to basepath, +// even if basepath and targpath share no elements. +// An error is returned if targpath can't be made relative to basepath or if +// knowing the current working directory would be necessary to compute it. +// Rel calls Clean on the result. +char *path_relative(const char *basepath, const char *targpath) +{ + char resbase[PATH_MAX] = {0}, *base = NULL; + char restarg[PATH_MAX] = {0}, *targ = NULL; + size_t bl = 0, tl = 0, b0 = 0, bi = 0, t0 = 0, ti = 0; + + base = cleanpath(basepath, resbase, PATH_MAX); + if (!base) { + ERROR("Failed to get cleaned path"); + return NULL; + } + + targ = cleanpath(targpath, restarg, PATH_MAX); + if (!targ) { + ERROR("Failed to get cleaned path"); + return NULL; + } + + if (strcmp(base, targ) == 0) + return safe_strdup("."); + + bl = strlen(base); + tl = strlen(targ); + while(true) { + while(bi < bl && !ISSLASH(base[bi])) + bi++; + while(ti < tl && !ISSLASH(targ[ti])) + ti++; + //not the same string + if (((bi - b0) != (ti - t0)) || strncmp(base + b0, targ + t0, bi - b0)) + break; + if (bi < bl) + bi++; + if (ti < tl) + ti++; + b0 = bi; + t0 = ti; + } + + if (b0 != bl) { + // Base elements left. Must go up before going down. + int seps = 0, i; + size_t ncopyed = 0, seps_size; + char *buf = NULL; + + for (bi = b0; bi < bl; bi++) { + if (ISSLASH(base[bi])) + seps++; + } + //strlen(..) + strlen(/..) + '\0' + seps_size = 2 + seps * 3 + 1; + if (t0 != tl) + seps_size += 1 + tl - t0; + + buf = calloc(seps_size, 1); + if (!buf) { + ERROR("Out of memory"); + return NULL; + } + buf[ncopyed++] = '.'; + buf[ncopyed++] = '.'; + for (i = 0; i < seps; i++) { + buf[ncopyed++] = '/'; + buf[ncopyed++] = '.'; + buf[ncopyed++] = '.'; + } + if (t0 != tl) { + buf[ncopyed++] = '/'; + memcpy(buf + ncopyed, targ + t0, tl - t0 + 1); + } + return buf; + } + + return safe_strdup(targ + t0); +} diff --git a/src/lxc/path.h b/src/lxc/path.h new file mode 100644 index 000000000..2c60fb9be --- /dev/null +++ b/src/lxc/path.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved + * Description: isulad utils + * Author: lifeng + * Create: 2020-04-11 +******************************************************************************/ +#ifndef __ISULAD_PATH_H_ +#define __ISULAD_PATH_H_ + +#include + +bool specify_current_dir(const char *path); + +bool has_traling_path_separator(const char *path); + +// PreserveTrailingDotOrSeparator returns the given cleaned path +// and appends a trailing `/.` or `/` if its corresponding original +// path ends with a trailing `/.` or `/`. If the cleaned +// path already ends in a `.` path segment, then another is not added. If the +// clean path already ends in a path separator, then another is not added. +char *preserve_trailing_dot_or_separator(const char *cleanedpath, + const char *originalpath); + + +// Split splits path immediately following the final Separator, +// separating it into a directory and file name component. +// If there is no Separator in path, Split returns an empty dir +// and file set to path. +// The returned values have the property that path = dir+file. +bool filepath_split(const char *path, char **dir, char **base); + +/* + * cleanpath is similar to realpath of glibc, but not expands symbolic links, + * and not check the existence of components of the path. + */ +char *cleanpath(const char *path, char *realpath, size_t realpath_len); + + +// FollowSymlinkInScope is a wrapper around evalSymlinksInScope that returns an +// absolute path. This function handles paths in a platform-agnostic manner. +char *follow_symlink_in_scope(const char *fullpath, const char *rootpath); + +// GetResourcePath evaluates `path` in the scope of the container's rootpath, with proper path +// sanitisation. Symlinks are all scoped to the rootpath of the container, as +// though the container's rootpath was `/`. +// +// The BaseFS of a container is the host-facing path which is bind-mounted as +// `/` inside the container. This method is essentially used to access a +// particular path inside the container as though you were a process in that +// container. +int get_resource_path(const char *rootpath, const char *path, + char **scopepath); + +// Rel returns a relative path that is lexically equivalent to targpath when +// joined to basepath with an intervening separator. That is, +// Join(basepath, Rel(basepath, targpath)) is equivalent to targpath itself. +// On success, the returned path will always be relative to basepath, +// even if basepath and targpath share no elements. +// An error is returned if targpath can't be made relative to basepath or if +// knowing the current working directory would be necessary to compute it. +// Rel calls Clean on the result. +char *path_relative(const char *basepath, const char *targpath); + +#endif diff --git a/src/lxc/process_utils.h b/src/lxc/process_utils.h deleted file mode 100644 index 4ea898a63..000000000 --- a/src/lxc/process_utils.h +++ /dev/null @@ -1,290 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1+ */ - -#ifndef __LXC_PROCESS_UTILS_H -#define __LXC_PROCESS_UTILS_H - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 -#endif -#include -#include -#include -#include -#include -#include -#include -#include - -#include "compiler.h" -#include "config.h" -#include "syscall_numbers.h" - -#ifndef CSIGNAL -#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ -#endif - -#ifndef CLONE_VM -#define CLONE_VM 0x00000100 /* set if VM shared between processes */ -#endif - -#ifndef CLONE_FS -#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ -#endif - -#ifndef CLONE_FILES -#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ -#endif - -#ifndef CLONE_SIGHAND -#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ -#endif - -#ifndef CLONE_PIDFD -#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */ -#endif - -#ifndef CLONE_PTRACE -#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ -#endif - -#ifndef CLONE_VFORK -#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ -#endif - -#ifndef CLONE_PARENT -#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ -#endif - -#ifndef CLONE_THREAD -#define CLONE_THREAD 0x00010000 /* Same thread group? */ -#endif - -#ifndef CLONE_NEWNS -#define CLONE_NEWNS 0x00020000 /* New mount namespace group */ -#endif - -#ifndef CLONE_SYSVSEM -#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */ -#endif - -#ifndef CLONE_SETTLS -#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */ -#endif - -#ifndef CLONE_PARENT_SETTID -#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */ -#endif - -#ifndef CLONE_CHILD_CLEARTID -#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */ -#endif - -#ifndef CLONE_DETACHED -#define CLONE_DETACHED 0x00400000 /* Unused, ignored */ -#endif - -#ifndef CLONE_UNTRACED -#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */ -#endif - -#ifndef CLONE_CHILD_SETTID -#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ -#endif - -#ifndef CLONE_NEWCGROUP -#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ -#endif - -#ifndef CLONE_NEWUTS -#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ -#endif - -#ifndef CLONE_NEWIPC -#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ -#endif - -#ifndef CLONE_NEWUSER -#define CLONE_NEWUSER 0x10000000 /* New user namespace */ -#endif - -#ifndef CLONE_NEWPID -#define CLONE_NEWPID 0x20000000 /* New pid namespace */ -#endif - -#ifndef CLONE_NEWNET -#define CLONE_NEWNET 0x40000000 /* New network namespace */ -#endif - -#ifndef CLONE_IO -#define CLONE_IO 0x80000000 /* Clone io context */ -#endif - -/* Flags for the clone3() syscall. */ -#ifndef CLONE_CLEAR_SIGHAND -#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */ -#endif - -#ifndef CLONE_INTO_CGROUP -#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ -#endif - -/* - * cloning flags intersect with CSIGNAL so can be used with unshare and clone3 - * syscalls only: - */ -#ifndef CLONE_NEWTIME -#define CLONE_NEWTIME 0x00000080 /* New time namespace */ -#endif - -/* waitid */ -#ifndef P_PIDFD -#define P_PIDFD 3 -#endif - -#ifndef CLONE_ARGS_SIZE_VER0 -#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ -#endif - -#ifndef CLONE_ARGS_SIZE_VER1 -#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ -#endif - -#ifndef CLONE_ARGS_SIZE_VER2 -#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ -#endif - -#ifndef ptr_to_u64 -#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) -#endif -#ifndef u64_to_ptr -#define u64_to_ptr(x) ((void *)(uintptr_t)x) -#endif - -struct lxc_clone_args { - __aligned_u64 flags; - __aligned_u64 pidfd; - __aligned_u64 child_tid; - __aligned_u64 parent_tid; - __aligned_u64 exit_signal; - __aligned_u64 stack; - __aligned_u64 stack_size; - __aligned_u64 tls; - __aligned_u64 set_tid; - __aligned_u64 set_tid_size; - __aligned_u64 cgroup; -}; - -__returns_twice static inline pid_t lxc_clone3(struct lxc_clone_args *args, size_t size) -{ - return syscall(__NR_clone3, args, size); -} - -#if defined(__ia64__) -int __clone2(int (*__fn)(void *__arg), void *__child_stack_base, - size_t __child_stack_size, int __flags, void *__arg, ...); -#else -int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ... - /* pid_t *ptid, struct user_desc *tls, pid_t *ctid */); -#endif - -/** - * lxc_clone() - create a new process - * - * - allocate stack: - * This function allocates a new stack the size of page and passes it to the - * kernel. - * - * - support all CLONE_*flags: - * This function supports all CLONE_* flags. If in doubt or not sufficiently - * familiar with process creation in the kernel and interactions with libcs - * this function should be used. - * - * - pthread_atfork() handlers depending on libc: - * Whether this function runs pthread_atfork() handlers depends on the - * corresponding libc wrapper. glibc currently does not run pthread_atfork() - * handlers but does not guarantee that they are not. Other libcs might or - * might not run pthread_atfork() handlers. If you require guarantees please - * refer to the lxc_raw_clone*() functions in process_utils.{c,h}. - * - * - should call lxc_raw_getpid(): - * The child should use lxc_raw_getpid() to retrieve its pid. - */ -extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd); - - -/* - * lxc_raw_clone() - create a new process - * - * - fork() behavior: - * This function returns 0 in the child and > 0 in the parent. - * - * - copy-on-write: - * This function does not allocate a new stack and relies on copy-on-write - * semantics. - * - * - supports subset of ClONE_* flags: - * lxc_raw_clone() intentionally only supports a subset of the flags available - * to the actual system call. Please refer to the implementation what flags - * cannot be used. Also, please don't assume that just because a flag isn't - * explicitly checked for as being unsupported that it is supported. If in - * doubt or not sufficiently familiar with process creation in the kernel and - * interactions with libcs this function should be used. - * - * - no pthread_atfork() handlers: - * This function circumvents - as much as this this is possible - any libc - * wrappers and thus does not run any pthread_atfork() handlers. Make sure - * that this is safe to do in the context you are trying to call this - * function. - * - * - must call lxc_raw_getpid(): - * The child must use lxc_raw_getpid() to retrieve its pid. - */ -extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd); - -/* - * lxc_raw_clone_cb() - create a new process - * - * - non-fork() behavior: - * Function does return pid of the child or -1 on error. Pass in a callback - * function via the "fn" argument that gets executed in the child process. - * The "args" argument is passed to "fn". - * - * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb() - * as well. - */ -extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, - unsigned long flags, int *pidfd); - -#ifndef HAVE_EXECVEAT -static inline int execveat(int dirfd, const char *pathname, char *const argv[], - char *const envp[], int flags) -{ - return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); -} -#else -extern int execveat(int dirfd, const char *pathname, char *const argv[], - char *const envp[], int flags); -#endif - -/* - * Because of older glibc's pid cache (up to 2.25) whenever clone() is called - * the child must must retrieve it's own pid via lxc_raw_getpid(). - */ -static inline pid_t lxc_raw_getpid(void) -{ - return (pid_t)syscall(SYS_getpid); -} - -static inline pid_t lxc_raw_gettid(void) -{ -#if __NR_gettid > 0 - return syscall(__NR_gettid); -#else - return lxc_raw_getpid(); -#endif -} - -extern int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, - unsigned int flags); - -#endif /* __LXC_PROCESS_UTILS_H */ diff --git a/src/lxc/process_utils.c b/src/lxc/raw_syscalls.c similarity index 68% rename from src/lxc/process_utils.c rename to src/lxc/raw_syscalls.c index 7494def46..3c6bd2506 100644 --- a/src/lxc/process_utils.c +++ b/src/lxc/raw_syscalls.c @@ -13,12 +13,15 @@ #include "compiler.h" #include "config.h" -#include "log.h" #include "macro.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "syscall_numbers.h" -lxc_log_define(process_utils, lxc); +int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} /* * This is based on raw_clone in systemd but adapted to our needs. This uses @@ -28,8 +31,16 @@ lxc_log_define(process_utils, lxc); * The nice thing about this is that we get fork() behavior. That is * lxc_raw_clone() returns 0 in the child and the child pid in the parent. */ -__returns_twice static pid_t __lxc_raw_clone(unsigned long flags, int *pidfd) +__returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd) { + /* + * These flags don't interest at all so we don't jump through any hoops + * of retrieving them and passing them to the kernel. + */ + errno = EINVAL; + if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | + CLONE_CHILD_CLEARTID | CLONE_SETTLS))) + return -EINVAL; #if defined(__s390x__) || defined(__s390__) || defined(__CRIS__) /* On s390/s390x and cris the order of the first and second arguments @@ -89,31 +100,6 @@ __returns_twice static pid_t __lxc_raw_clone(unsigned long flags, int *pidfd) #endif } -__returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd) -{ - pid_t pid; - struct lxc_clone_args args = { - .flags = flags, - .pidfd = ptr_to_u64(pidfd), - }; - - if (flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | - CLONE_CHILD_CLEARTID | CLONE_SETTLS)) - return ret_errno(EINVAL); - - /* On CLONE_PARENT we inherit the parent's exit signal. */ - if (!(flags & CLONE_PARENT)) - args.exit_signal = SIGCHLD; - - pid = lxc_clone3(&args, CLONE_ARGS_SIZE_VER0); - if (pid < 0 && errno == ENOSYS) { - SYSTRACE("Falling back to legacy clone"); - return __lxc_raw_clone(flags, pidfd); - } - - return pid; -} - pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags, int *pidfd) { @@ -138,30 +124,3 @@ int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, { return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); } - -/* - * Let's use the "standard stack limit" (i.e. glibc thread size default) for - * stack sizes: 8MB. - */ -#define __LXC_STACK_SIZE (8 * 1024 * 1024) -pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd) -{ - pid_t ret; - void *stack; - - stack = malloc(__LXC_STACK_SIZE); - if (!stack) { - SYSERROR("Failed to allocate clone stack"); - return -ENOMEM; - } - -#ifdef __ia64__ - ret = __clone2(fn, stack, __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); -#else - ret = clone(fn, stack + __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); -#endif - if (ret < 0) - SYSERROR("Failed to clone (%#x)", flags); - - return ret; -} diff --git a/src/lxc/raw_syscalls.h b/src/lxc/raw_syscalls.h new file mode 100644 index 000000000..1219f28f4 --- /dev/null +++ b/src/lxc/raw_syscalls.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#ifndef __LXC_RAW_SYSCALL_H +#define __LXC_RAW_SYSCALL_H + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#include +#include +#include +#include +#include +#include +#include + +/* clone */ +#ifndef CLONE_PIDFD +#define CLONE_PIDFD 0x00001000 +#endif + +/* waitid */ +#ifndef P_PIDFD +#define P_PIDFD 3 +#endif + +/* + * lxc_raw_clone() - create a new process + * + * - fork() behavior: + * This function returns 0 in the child and > 0 in the parent. + * + * - copy-on-write: + * This function does not allocate a new stack and relies on copy-on-write + * semantics. + * + * - supports subset of ClONE_* flags: + * lxc_raw_clone() intentionally only supports a subset of the flags available + * to the actual system call. Please refer to the implementation what flags + * cannot be used. Also, please don't assume that just because a flag isn't + * explicitly checked for as being unsupported that it is supported. If in + * doubt or not sufficiently familiar with process creation in the kernel and + * interactions with libcs this function should be used. + * + * - no pthread_atfork() handlers: + * This function circumvents - as much as this this is possible - any libc + * wrappers and thus does not run any pthread_atfork() handlers. Make sure + * that this is safe to do in the context you are trying to call this + * function. + * + * - must call lxc_raw_getpid(): + * The child must use lxc_raw_getpid() to retrieve its pid. + */ +extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd); + +/* + * lxc_raw_clone_cb() - create a new process + * + * - non-fork() behavior: + * Function does return pid of the child or -1 on error. Pass in a callback + * function via the "fn" argument that gets executed in the child process. + * The "args" argument is passed to "fn". + * + * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb() + * as well. + */ +extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, + unsigned long flags, int *pidfd); + +extern int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags); + +/* + * Because of older glibc's pid cache (up to 2.25) whenever clone() is called + * the child must must retrieve it's own pid via lxc_raw_getpid(). + */ +static inline pid_t lxc_raw_getpid(void) +{ + return (pid_t)syscall(SYS_getpid); +} + +static inline pid_t lxc_raw_gettid(void) +{ +#if __NR_gettid > 0 + return syscall(__NR_gettid); +#else + return lxc_raw_getpid(); +#endif +} + +extern int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, + unsigned int flags); + +#endif /* __LXC_RAW_SYSCALL_H */ diff --git a/src/lxc/rexec.c b/src/lxc/rexec.c index cf198c021..c9c84b8c1 100644 --- a/src/lxc/rexec.c +++ b/src/lxc/rexec.c @@ -13,7 +13,7 @@ #include "file_utils.h" #include "macro.h" #include "memory_utils.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "string_utils.h" #include "syscall_wrappers.h" @@ -88,7 +88,7 @@ static int is_memfd(void) static void lxc_rexec_as_memfd(char **argv, char **envp, const char *memfd_name) { __do_close int execfd = -EBADF, fd = -EBADF, memfd = -EBADF, - tmpfd = -EBADF; + tmpfd = -EBADF; int ret; ssize_t bytes_sent = 0; struct stat st = {0}; @@ -143,7 +143,7 @@ static void lxc_rexec_as_memfd(char **argv, char **envp, const char *memfd_name) if (fcntl(memfd, F_ADD_SEALS, LXC_MEMFD_REXEC_SEALS)) return; - execfd = move_fd(memfd); + execfd = memfd; } else { char procfd[LXC_PROC_PID_FD_LEN]; @@ -169,12 +169,13 @@ extern char **environ; int lxc_rexec(const char *memfd_name) { - __do_free_string_list char **argv = NULL; int ret; + char **argv = NULL; ret = is_memfd(); if (ret < 0 && ret == -ENOTRECOVERABLE) { - fprintf(stderr, "%s - Failed to determine whether this is a memfd\n", + fprintf(stderr, + "%s - Failed to determine whether this is a memfd\n", strerror(errno)); return -1; } else if (ret > 0) { @@ -183,7 +184,8 @@ int lxc_rexec(const char *memfd_name) ret = parse_argv(&argv); if (ret < 0) { - fprintf(stderr, "%s - Failed to parse command line parameters\n", + fprintf(stderr, + "%s - Failed to parse command line parameters\n", strerror(errno)); return -1; } diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c index 7820db8b2..4b9d23c55 100644 --- a/src/lxc/seccomp.c +++ b/src/lxc/seccomp.c @@ -295,7 +295,11 @@ on_error: #endif #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH +#ifdef HAVE_ISULAD +enum lxc_arch_t { +#else enum lxc_hostarch_t { +#endif lxc_seccomp_arch_all = 0, lxc_seccomp_arch_native, lxc_seccomp_arch_i386, @@ -351,8 +355,13 @@ int get_hostarch(void) return lxc_seccomp_arch_unknown; } +#ifdef HAVE_ISULAD +scmp_filter_ctx get_new_ctx(enum lxc_arch_t n_arch, + uint32_t default_policy_action, uint32_t *architectures) +#else scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_action, bool *needs_merge) +#endif { int ret; uint32_t arch; @@ -475,10 +484,17 @@ scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, return NULL; } TRACE("Removed native arch from main seccomp context"); - +#ifdef HAVE_ISULAD + *architectures = arch; +#else *needs_merge = true; +#endif } else { +#ifdef HAVE_ISULAD + *architectures = SCMP_ARCH_NATIVE; +#else *needs_merge = false; +#endif TRACE("Arch %d already present in main seccomp context", (int)n_arch); } @@ -510,7 +526,11 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, if (ret < 0) { errno = -ret; SYSERROR("Failed loading rule to reject force umount"); +#ifdef HAVE_ISULAD + return true; +#else return false; +#endif } INFO("Set seccomp rule to reject force umounts"); @@ -519,20 +539,34 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, nr = seccomp_syscall_resolve_name(line); if (nr == __NR_SCMP_ERROR) { +#ifdef HAVE_ISULAD + DEBUG("Failed to resolve syscall \"%s\"", line); + DEBUG("This syscall will NOT be handled by seccomp"); +#else WARN("Failed to resolve syscall \"%s\"", line); WARN("This syscall will NOT be handled by seccomp"); +#endif return true; } if (nr < 0) { +#ifdef HAVE_ISULAD + DEBUG("Got negative return value %d for syscall \"%s\"", nr, line); + DEBUG("This syscall will NOT be handled by seccomp"); +#else WARN("Got negative return value %d for syscall \"%s\"", nr, line); WARN("This syscall will NOT be handled by seccomp"); +#endif return true; } memset(&arg_cmp, 0, sizeof(arg_cmp)); for (i = 0; i < rule->args_num; i++) { +#ifdef HAVE_ISULAD + DEBUG("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i, +#else INFO("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i, +#endif rule->args_value[i].index, (long long unsigned int)rule->args_value[i].op, (long long unsigned int)rule->args_value[i].mask, @@ -553,14 +587,43 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, rule->args_num, arg_cmp); if (ret < 0) { errno = -ret; +#ifdef HAVE_ISULAD + DEBUG("Failed loading rule for %s (nr %d action %d (%s))", + line, nr, rule->action, get_action_name(rule->action)); + return true; +#else SYSERROR("Failed loading rule for %s (nr %d action %d (%s))", line, nr, rule->action, get_action_name(rule->action)); return false; +#endif } return true; } +#ifdef HAVE_ISULAD +#define SCMP_ARCH_INDEX_MAX 3 + +struct scmp_ctx_info { + uint32_t architectures[SCMP_ARCH_INDEX_MAX]; + enum lxc_arch_t lxc_arch[SCMP_ARCH_INDEX_MAX]; + scmp_filter_ctx contexts[SCMP_ARCH_INDEX_MAX]; + bool needs_merge[SCMP_ARCH_INDEX_MAX]; +}; + +static int get_arch_index(enum lxc_arch_t arch, struct scmp_ctx_info *ctx) +{ + int i; + + for (i = 0; i < SCMP_ARCH_INDEX_MAX; i++) { + if (ctx->lxc_arch[i] == arch) + return i; + } + + return -1; +} +#endif + /* * v2 consists of * [x86] @@ -575,6 +638,521 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, * write * close */ +#ifdef HAVE_ISULAD +static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf) +{ + int ret; + char *p; + enum lxc_arch_t cur_rule_arch, native_arch; + bool blacklist = false; + uint32_t default_policy_action = -1, default_rule_action = -1; + struct seccomp_v2_rule rule; + struct scmp_ctx_info ctx; + + if (strncmp(line, "blacklist", 9) == 0) + blacklist = true; + else if (strncmp(line, "whitelist", 9) != 0) { + ERROR("Bad seccomp policy style \"%s\"", line); + return -1; + } + + p = strchr(line, ' '); + if (p) { + default_policy_action = get_v2_default_action(p + 1); + if (default_policy_action == -2) + return -1; + } + + /* for blacklist, allow any syscall which has no rule */ + if (blacklist) { + if (default_policy_action == -1) + default_policy_action = SCMP_ACT_ALLOW; + + if (default_rule_action == -1) + default_rule_action = SCMP_ACT_KILL; + } else { + if (default_policy_action == -1) + default_policy_action = SCMP_ACT_KILL; + + if (default_rule_action == -1) + default_rule_action = SCMP_ACT_ALLOW; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.architectures[0] = SCMP_ARCH_NATIVE; + ctx.architectures[1] = SCMP_ARCH_NATIVE; + ctx.architectures[2] = SCMP_ARCH_NATIVE; + native_arch = get_hostarch(); + cur_rule_arch = native_arch; + if (native_arch == lxc_seccomp_arch_amd64) { + cur_rule_arch = lxc_seccomp_arch_all; + + ctx.lxc_arch[0] = lxc_seccomp_arch_i386; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + + ctx.lxc_arch[1] = lxc_seccomp_arch_x32; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; + + ctx.lxc_arch[2] = lxc_seccomp_arch_amd64; + ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64, + default_policy_action, &ctx.architectures[2]); + if (!ctx.contexts[2]) + goto bad; +#ifdef SCMP_ARCH_PPC + } else if (native_arch == lxc_seccomp_arch_ppc64) { + cur_rule_arch = lxc_seccomp_arch_all; + + ctx.lxc_arch[0] = lxc_seccomp_arch_ppc; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + + ctx.lxc_arch[1] = lxc_seccomp_arch_ppc64; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_ppc64, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; +#endif +#ifdef SCMP_ARCH_ARM + } else if (native_arch == lxc_seccomp_arch_arm64) { + cur_rule_arch = lxc_seccomp_arch_all; + + ctx.lxc_arch[0] = lxc_seccomp_arch_arm; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + +#ifdef SCMP_ARCH_AARCH64 + ctx.lxc_arch[1] = lxc_seccomp_arch_arm64; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_arm64, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; +#endif +#endif +#ifdef SCMP_ARCH_MIPS + } else if (native_arch == lxc_seccomp_arch_mips64) { + cur_rule_arch = lxc_seccomp_arch_all; + + ctx.lxc_arch[0] = lxc_seccomp_arch_mips; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + + ctx.lxc_arch[1] = lxc_seccomp_arch_mips64n32; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; + + ctx.lxc_arch[2] = lxc_seccomp_arch_mips64; + ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64, + default_policy_action, &ctx.architectures[2]); + if (!ctx.contexts[2]) + goto bad; + } else if (native_arch == lxc_seccomp_arch_mipsel64) { + cur_rule_arch = lxc_seccomp_arch_all; + ctx.lxc_arch[0] = lxc_seccomp_arch_mipsel; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + + ctx.lxc_arch[1] = lxc_seccomp_arch_mipsel64n32; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; + + ctx.lxc_arch[2] = lxc_seccomp_arch_mipsel64; + ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64, + default_policy_action, &ctx.architectures[2]); + if (!ctx.contexts[2]) + goto bad; +#endif + } + + if (default_policy_action != SCMP_ACT_KILL) { + ret = seccomp_reset(conf->seccomp.seccomp_ctx, default_policy_action); + if (ret != 0) { + ERROR("Error re-initializing Seccomp"); + return -1; + } + + ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0); + if (ret < 0) { + errno = -ret; + SYSERROR("Failed to turn off no-new-privs"); + return -1; + } + +#ifdef SCMP_FLTATR_ATL_TSKIP + ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1); + if (ret < 0) { + errno = -ret; + SYSWARN("Failed to turn on seccomp nop-skip, continuing"); + } +#endif + } + + while (getline(&line, line_bufsz, f) != -1) { + if (line[0] == '#') + continue; + + if (line[0] == '\0') + continue; + + remove_trailing_newlines(line); + +#ifdef HAVE_ISULAD + DEBUG("Processing \"%s\"", line); +#else + INFO("Processing \"%s\"", line); +#endif + if (line[0] == '[') { + /* Read the architecture for next set of rules. */ + if (strcmp(line, "[x86]") == 0 || + strcmp(line, "[X86]") == 0) { + if (native_arch != lxc_seccomp_arch_i386 && + native_arch != lxc_seccomp_arch_amd64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_i386; + } else if (strcmp(line, "[x32]") == 0 || + strcmp(line, "[X32]") == 0) { + if (native_arch != lxc_seccomp_arch_amd64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_x32; + } else if (strcmp(line, "[X86_64]") == 0 || + strcmp(line, "[x86_64]") == 0) { + if (native_arch != lxc_seccomp_arch_amd64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_amd64; + } else if (strcmp(line, "[all]") == 0 || + strcmp(line, "[ALL]") == 0) { + cur_rule_arch = lxc_seccomp_arch_all; + } +#ifdef SCMP_ARCH_ARM + else if (strcmp(line, "[arm]") == 0 || + strcmp(line, "[ARM]") == 0) { + if (native_arch != lxc_seccomp_arch_arm && + native_arch != lxc_seccomp_arch_arm64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_arm; + } +#endif +#ifdef SCMP_ARCH_AARCH64 + else if (strcmp(line, "[arm64]") == 0 || + strcmp(line, "[ARM64]") == 0) { + if (native_arch != lxc_seccomp_arch_arm64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_arm64; + } +#endif +#ifdef SCMP_ARCH_PPC64LE + else if (strcmp(line, "[ppc64le]") == 0 || + strcmp(line, "[PPC64LE]") == 0) { + if (native_arch != lxc_seccomp_arch_ppc64le) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_ppc64le; + } +#endif +#ifdef SCMP_ARCH_PPC64 + else if (strcmp(line, "[ppc64]") == 0 || + strcmp(line, "[PPC64]") == 0) { + if (native_arch != lxc_seccomp_arch_ppc64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_ppc64; + } +#endif +#ifdef SCMP_ARCH_PPC + else if (strcmp(line, "[ppc]") == 0 || + strcmp(line, "[PPC]") == 0) { + if (native_arch != lxc_seccomp_arch_ppc && + native_arch != lxc_seccomp_arch_ppc64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_ppc; + } +#endif +#ifdef SCMP_ARCH_MIPS + else if (strcmp(line, "[mips64]") == 0 || + strcmp(line, "[MIPS64]") == 0) { + if (native_arch != lxc_seccomp_arch_mips64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mips64; + } else if (strcmp(line, "[mips64n32]") == 0 || + strcmp(line, "[MIPS64N32]") == 0) { + if (native_arch != lxc_seccomp_arch_mips64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mips64n32; + } else if (strcmp(line, "[mips]") == 0 || + strcmp(line, "[MIPS]") == 0) { + if (native_arch != lxc_seccomp_arch_mips && + native_arch != lxc_seccomp_arch_mips64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mips; + } else if (strcmp(line, "[mipsel64]") == 0 || + strcmp(line, "[MIPSEL64]") == 0) { + if (native_arch != lxc_seccomp_arch_mipsel64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mipsel64; + } else if (strcmp(line, "[mipsel64n32]") == 0 || + strcmp(line, "[MIPSEL64N32]") == 0) { + if (native_arch != lxc_seccomp_arch_mipsel64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mipsel64n32; + } else if (strcmp(line, "[mipsel]") == 0 || + strcmp(line, "[MIPSEL]") == 0) { + if (native_arch != lxc_seccomp_arch_mipsel && + native_arch != lxc_seccomp_arch_mipsel64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mipsel; + } +#endif +#ifdef SCMP_ARCH_S390X + else if (strcmp(line, "[s390x]") == 0 || + strcmp(line, "[S390X]") == 0) { + if (native_arch != lxc_seccomp_arch_s390x) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_s390x; + } +#endif + else { + goto bad_arch; + } + + continue; + } + + /* irrelevant arch - i.e. arm on i386 */ + if (cur_rule_arch == lxc_seccomp_arch_unknown) + continue; + + memset(&rule, 0, sizeof(rule)); + /* read optional action which follows the syscall */ + ret = parse_v2_rules(line, default_rule_action, &rule); + if (ret != 0) { + ERROR("Failed to interpret seccomp rule"); + goto bad_rule; + } + + if (cur_rule_arch == native_arch) { + /* add for native arch */ + if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line, + conf->seccomp.seccomp_ctx, &rule)) + goto bad_rule; + +#ifdef HAVE_ISULAD + DEBUG("Added native rule for arch %d for %s action %d(%s)", +#else + INFO("Added native rule for arch %d for %s action %d(%s)", +#endif + SCMP_ARCH_NATIVE, line, rule.action, + get_action_name(rule.action)); + } else if (cur_rule_arch != lxc_seccomp_arch_all) { + /* add for compat specified arch */ + int arch_index = get_arch_index(cur_rule_arch, &ctx); + if (arch_index < 0) + goto bad_arch; + + if (!do_resolve_add_rule(ctx.architectures[arch_index], line, + ctx.contexts[arch_index], &rule)) + goto bad_rule; + +#ifdef HAVE_ISULAD + DEBUG("Added compat rule for arch %d for %s action %d(%s)", +#else + INFO("Added compat rule for arch %d for %s action %d(%s)", +#endif + ctx.architectures[arch_index], line, rule.action, + get_action_name(rule.action)); + ctx.needs_merge[arch_index] = true; + } else { + /* add for all compat archs */ + if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line, + conf->seccomp.seccomp_ctx, &rule)) + goto bad_rule; + +#ifdef HAVE_ISULAD + DEBUG("Added native rule for arch %d for %s action %d(%s)", +#else + INFO("Added native rule for arch %d for %s action %d(%s)", +#endif + SCMP_ARCH_NATIVE, line, rule.action, + get_action_name(rule.action)); + + if (ctx.architectures[0] != SCMP_ARCH_NATIVE) { + if (!do_resolve_add_rule(ctx.architectures[0], line, + ctx.contexts[0], &rule)) + goto bad_rule; + +#ifdef HAVE_ISULAD + DEBUG("Added compat rule for arch %d for %s action %d(%s)", +#else + INFO("Added compat rule for arch %d for %s action %d(%s)", +#endif + ctx.architectures[0], line, rule.action, + get_action_name(rule.action)); + ctx.needs_merge[0] = true; + } + + if (ctx.architectures[1] != SCMP_ARCH_NATIVE) { + if (!do_resolve_add_rule(ctx.architectures[1], line, + ctx.contexts[1], &rule)) + goto bad_rule; + +#ifdef HAVE_ISULAD + DEBUG("Added compat rule for arch %d for %s action %d(%s)", +#else + INFO("Added compat rule for arch %d for %s action %d(%s)", +#endif + ctx.architectures[1], line, rule.action, + get_action_name(rule.action)); + ctx.needs_merge[1] = true; + } + + if (ctx.architectures[2] != SCMP_ARCH_NATIVE) { + if (!do_resolve_add_rule(ctx.architectures[2], line, + ctx.contexts[2], &rule)) + goto bad_rule; + +#ifdef HAVE_ISULAD + DEBUG("Added native rule for arch %d for %s action %d(%s)", +#else + INFO("Added native rule for arch %d for %s action %d(%s)", +#endif + ctx.architectures[2], line, rule.action, + get_action_name(rule.action)); + ctx.needs_merge[2] = true; + } + } + + } + + INFO("Merging compat seccomp contexts into main context"); + if (ctx.contexts[0]) { + if (ctx.needs_merge[0]) { + ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[0]); + if (ret < 0) { + ERROR("%s - Failed to merge first compat seccomp " + "context into main context", strerror(-ret)); + goto bad; + } + + TRACE("Merged first compat seccomp context into main context"); + } else { + seccomp_release(ctx.contexts[0]); + ctx.contexts[0] = NULL; + } + } + + if (ctx.contexts[1]) { + if (ctx.needs_merge[1]) { + ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[1]); + if (ret < 0) { + ERROR("%s - Failed to merge second compat seccomp " + "context into main context", strerror(-ret)); + goto bad; + } + + TRACE("Merged second compat seccomp context into main context"); + } else { + seccomp_release(ctx.contexts[1]); + ctx.contexts[1] = NULL; + } + } + + if (ctx.contexts[2]) { + if (ctx.needs_merge[2]) { + ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[2]); + if (ret < 0) { + ERROR("%s - Failed to merge third compat seccomp " + "context into main context", strerror(-ret)); + goto bad; + } + + TRACE("Merged third compat seccomp context into main context"); + } else { + seccomp_release(ctx.contexts[2]); + ctx.contexts[2] = NULL; + } + } + + free(line); + return 0; + +bad_arch: + ERROR("Unsupported architecture \"%s\"", line); + +bad_rule: +bad: + if (ctx.contexts[0]) + seccomp_release(ctx.contexts[0]); + + if (ctx.contexts[1]) + seccomp_release(ctx.contexts[1]); + + if (ctx.contexts[2]) + seccomp_release(ctx.contexts[2]); + + free(line); + + return -1; +} +#else static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf) { int ret; @@ -1067,6 +1645,7 @@ bad: return -1; } +#endif #else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf) { @@ -1354,7 +1933,6 @@ int seccomp_notify_handler(int fd, uint32_t events, void *data, char *cookie = conf->seccomp.notifier.cookie; uint64_t req_id; - memset(req, 0, sizeof(*req)); ret = seccomp_notify_receive(fd, req); if (ret) { SYSERROR("Failed to read seccomp notification"); diff --git a/src/lxc/start.c b/src/lxc/start.c index fd969c433..51d13254b 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -47,7 +47,7 @@ #include "monitor.h" #include "namespace.h" #include "network.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "start.h" #include "storage/storage.h" #include "storage/storage_utils.h" @@ -212,13 +212,6 @@ int lxc_check_inherited(struct lxc_conf *conf, bool closeall, if (conf && conf->close_all_fds) closeall = true; - /* - * Disable syslog at this point to avoid the above logging - * function to open a new fd and make the check_inherited function - * enter an infinite loop. - */ - lxc_log_syslog_disable(); - restart: dir = opendir("/proc/self/fd"); if (!dir) @@ -279,24 +272,21 @@ restart: #endif if (closeall) { - if (close(fd)) - SYSINFO("Closed inherited fd %d", fd); - else - INFO("Closed inherited fd %d", fd); + close(fd); closedir(dir); + INFO("Closed inherited fd %d", fd); goto restart; } WARN("Inherited fd %d", fd); } - closedir(dir); - /* - * Only enable syslog at this point to avoid the above logging - * function to open a new fd and make the check_inherited function - * enter an infinite loop. + /* Only enable syslog at this point to avoid the above logging function + * to open a new fd and make the check_inherited function enter an + * infinite loop. */ - lxc_log_syslog_enable(); + lxc_log_enable_syslog(); + closedir(dir); /* cannot fail */ return 0; } @@ -590,13 +580,23 @@ int lxc_poll(const char *name, struct lxc_handler *handler) TRACE("Mainloop is ready"); +#ifdef HAVE_ISULAD + // iSulad: close stdin pipe if we do not want open_stdin with container stdin + if (!handler->conf->console.open_stdin) { + if (handler->conf->console.pipes[0][1] > 0) { + close(handler->conf->console.pipes[0][1]); + handler->conf->console.pipes[0][1] = -1; + } + } +#endif + ret = lxc_mainloop(&descr, -1); close_prot_errno_disarm(descr.epfd); if (ret < 0 || !handler->init_died) goto out_mainloop_console; if (has_console) - ret = lxc_mainloop(&descr_console, 0); + ret = lxc_mainloop(&descr_console, 100); out_mainloop_console: if (has_console) { @@ -615,7 +615,32 @@ out_sigfd: return ret; } -void lxc_put_handler(struct lxc_handler *handler) +void lxc_zero_handler(struct lxc_handler *handler) +{ + memset(handler, 0, sizeof(struct lxc_handler)); + + handler->state = STOPPED; + + handler->pinfd = -EBADF; + + handler->pidfd = -EBADF; + + handler->sigfd = -EBADF; + + for (int i = 0; i < LXC_NS_MAX; i++) + handler->nsfd[i] = -EBADF; + + handler->data_sock[0] = -EBADF; + handler->data_sock[1] = -EBADF; + + handler->state_socket_pair[0] = -EBADF; + handler->state_socket_pair[1] = -EBADF; + + handler->sync_sock[0] = -EBADF; + handler->sync_sock[1] = -EBADF; +} + +void lxc_free_handler(struct lxc_handler *handler) { close_prot_errno_disarm(handler->pinfd); close_prot_errno_disarm(handler->pidfd); @@ -627,27 +652,22 @@ void lxc_put_handler(struct lxc_handler *handler) close_prot_errno_disarm(handler->state_socket_pair[0]); close_prot_errno_disarm(handler->state_socket_pair[1]); cgroup_exit(handler->cgroup_ops); - if (handler->conf && handler->conf->reboot == REBOOT_NONE) - free_disarm(handler); - else - handler->conf = NULL; + handler->conf = NULL; + free_disarm(handler); } -struct lxc_handler *lxc_init_handler(struct lxc_handler *old, - const char *name, struct lxc_conf *conf, +struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, const char *lxcpath, bool daemonize) { - int nr_keep_fds = 0; int ret; struct lxc_handler *handler; - if (!old) - handler = zalloc(sizeof(*handler)); - else - handler = old; + handler = malloc(sizeof(*handler)); if (!handler) return NULL; + memset(handler, 0, sizeof(*handler)); + /* Note that am_guest_unpriv() checks the effective uid. We * probably don't care if we are real root only if we are running * as root so this should be fine. @@ -671,6 +691,11 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, handler->nsfd[i] = -EBADF; handler->name = name; + +#ifdef HAVE_ISULAD + handler->exit_code = -1; /* isulad: record exit code of container */ +#endif + if (daemonize) handler->transient_pid = lxc_raw_getpid(); else @@ -691,8 +716,6 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, TRACE("Created anonymous pair {%d,%d} of unix sockets", handler->state_socket_pair[0], handler->state_socket_pair[1]); - handler->keep_fds[nr_keep_fds++] = handler->state_socket_pair[0]; - handler->keep_fds[nr_keep_fds++] = handler->state_socket_pair[1]; } if (handler->conf->reboot == REBOOT_NONE) { @@ -701,7 +724,6 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, ERROR("Failed to set up command socket"); goto on_error; } - handler->keep_fds[nr_keep_fds++] = handler->conf->maincmd_fd; } TRACE("Unix domain socket %d for command server is ready", @@ -710,7 +732,7 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, return handler; on_error: - lxc_put_handler(handler); + lxc_free_handler(handler); return NULL; } @@ -721,6 +743,10 @@ int lxc_init(const char *name, struct lxc_handler *handler) int ret; const char *loglevel; struct lxc_conf *conf = handler->conf; +#ifdef HAVE_ISULAD + conf->console.disable_pty = handler->disable_pty; + conf->console.open_stdin = handler->open_stdin; +#endif handler->monitor_pid = lxc_raw_getpid(); status_fd = open("/proc/self/status", O_RDONLY | O_CLOEXEC); @@ -810,6 +836,9 @@ int lxc_init(const char *name, struct lxc_handler *handler) ret = lxc_terminal_setup(conf); if (ret < 0) { ERROR("Failed to create console"); +#ifdef HAVE_ISULAD + lxc_write_error_message(conf->errpipe[1], "Failed to create console for container \"%s\".", name); +#endif goto out_restore_sigmask; } TRACE("Created console"); @@ -853,6 +882,185 @@ out_restore_sigmask: return -1; } +#ifdef HAVE_ISULAD +/* isulad: start timeout thread */ +typedef enum { + START_INIT, + START_TIMEOUT, + START_MAX, +} start_timeout_t; + +static start_timeout_t global_timeout_state = START_INIT; +static sem_t global_timeout_sem; + +struct start_timeout_conf { + unsigned int timeout; + int errfd; +}; + +void trim_line(char *s) +{ + size_t len; + + len = strlen(s); + while ((len > 1) && (s[len - 1] == '\n')) + s[--len] = '\0'; +} + +static int _read_procs_file(const char *path, pid_t **pids, size_t *len) +{ + FILE *f; + char *line = NULL; + size_t sz = 0; + pid_t *tmp_pids = NULL; + + f = fopen_cloexec(path, "r"); + if (!f) + return -1; + + while (getline(&line, &sz, f) != -1) { + pid_t pid; + trim_line(line); + pid = (pid_t)atoll(line); + if (lxc_mem_realloc((void **)&tmp_pids, sizeof(pid_t) * (*len + 1), *pids, sizeof(pid_t) * (*len)) != 0) { + free(*pids); + *pids = NULL; + ERROR("out of memory"); + free(line); + fclose(f); + return -1; + } + *pids = tmp_pids; + + (*pids)[*len] = pid; + (*len)++; + } + + free(line); + fclose(f); + return 0; +} + +static int _recursive_read_cgroup_procs(const char *dirpath, pid_t **pids, size_t *len) +{ + struct dirent *direntp = NULL; + DIR *dir = NULL; + int ret, failed = 0; + char pathname[PATH_MAX]; + + dir = opendir(dirpath); + if (dir == NULL) { + WARN("Failed to open \"%s\"", dirpath); + return 0; + } + + while ((direntp = readdir(dir))) { + struct stat mystat; + int rc; + + if (!strcmp(direntp->d_name, ".") || + !strcmp(direntp->d_name, "..")) + continue; + + rc = snprintf(pathname, PATH_MAX, "%s/%s", dirpath, direntp->d_name); + if (rc < 0 || rc >= PATH_MAX) { + failed = 1; + continue; + } + + if (strcmp(direntp->d_name, "cgroup.procs") == 0) { + if (_read_procs_file(pathname, pids, len)) { + failed = 1; + + } + continue; + } + + ret = lstat(pathname, &mystat); + if (ret) { + failed = 1; + continue; + } + + if (S_ISDIR(mystat.st_mode)) { + if (_recursive_read_cgroup_procs(pathname, pids, len) < 0) + failed = 1; + } + } + + ret = closedir(dir); + if (ret) { + WARN("Failed to close directory \"%s\"", dirpath); + failed = 1; + } + + return failed ? -1 : 0; +} + +int get_all_pids(struct cgroup_ops *cg_ops, pid_t **pids, size_t *len) +{ + const char *devices_path = NULL; + + devices_path = cg_ops->get_cgroup_full_path(cg_ops, "devices"); + if (!file_exists(devices_path)) { + return 0; + } + + return _recursive_read_cgroup_procs(devices_path, pids, len); +} + +static int set_cgroup_freezer(struct cgroup_ops *cg_ops, const char *value) +{ + char *fullpath; + int ret; + + fullpath = must_make_path(cg_ops->get_cgroup_full_path(cg_ops, "freezer"), "freezer.state", NULL); + ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); + free(fullpath); + return ret; +} + +/* isulad: kill all process in container cgroup path */ +static void signal_all_processes(struct lxc_handler *handler) +{ + int ret; + struct cgroup_ops *cg_ops = handler->cgroup_ops; + pid_t *pids = NULL; + size_t len = 0, i; + + ret = set_cgroup_freezer(cg_ops, "FROZEN"); + if (ret < 0 && errno != ENOENT) { + WARN("cgroup_set frozen failed"); + } + + ret = get_all_pids(cg_ops, &pids, &len); + if (ret < 0) { + WARN("failed to get all pids"); + } + + for (i = 0; i < len; i++) { + ret = kill(pids[i], SIGKILL); + if (ret < 0 && errno != ESRCH) { + WARN("Can not kill process (pid=%d) with SIGKILL for container %s", pids[i], handler->name); + } + } + + ret = set_cgroup_freezer(cg_ops, "THAWED"); + if (ret < 0 && errno != ENOENT) { + WARN("cgroup_set thawed failed"); + } + + for (i = 0; i < len; i++) { + ret = lxc_wait_for_pid_status(pids[i]); + if (ret < 0 && errno != ECHILD) { + WARN("Failed to wait pid %d for container %s: %s", pids[i], handler->name, strerror(errno)); + } + } + + free(pids); +} +#endif + void lxc_end(struct lxc_handler *handler) { int ret; @@ -926,11 +1134,37 @@ void lxc_end(struct lxc_handler *handler) lsm_process_cleanup(handler->conf, handler->lxcpath); +#ifdef HAVE_ISULAD + // close maincmd fd before destroy cgroup for isulad + if (handler->conf->reboot == REBOOT_NONE) { + /* For all new state clients simply close the command socket. + * This will inform all state clients that the container is + * STOPPED and also prevents a race between a open()/close() on + * the command socket causing a new process to get ECONNREFUSED + * because we haven't yet closed the command socket. + */ + close_prot_errno_disarm(handler->conf->maincmd_fd); + TRACE("Closed command socket"); + } + int retry_count = 0; + int max_retry = 10; +retry: + if (cgroup_ops != NULL && !cgroup_ops->payload_destroy(cgroup_ops, handler)) { + TRACE("Trying to kill all subprocess"); + signal_all_processes(handler); + TRACE("Finished kill all subprocess"); + if (retry_count < max_retry) { + usleep(100 * 1000); /* 100 millisecond */ + retry_count++; + goto retry; + } + SYSERROR("Failed to destroy cgroup path for container: \"%s\"", handler->name); + } +#else if (cgroup_ops) { cgroup_ops->payload_destroy(cgroup_ops, handler); cgroup_ops->monitor_destroy(cgroup_ops, handler); } - if (handler->conf->reboot == REBOOT_NONE) { /* For all new state clients simply close the command socket. * This will inform all state clients that the container is @@ -940,12 +1174,25 @@ void lxc_end(struct lxc_handler *handler) */ close_prot_errno_disarm(handler->conf->maincmd_fd); TRACE("Closed command socket"); + } +#endif + if (handler->conf->reboot == REBOOT_NONE) { /* This function will try to connect to the legacy lxc-monitord * state server and only exists for backwards compatibility. */ lxc_monitor_send_state(name, STOPPED, handler->lxcpath); +#ifdef HAVE_ISULAD + /* isuald: write exit code to exit fifo */ + if (handler->conf->exit_fd >= 0) { + ret = write(handler->conf->exit_fd, &handler->exit_code, sizeof(int)); + if (ret != sizeof(int)) { + SYSERROR("Failed to write to exit code to exit fifo."); + } + } +#endif + /* The command socket is closed so no one can acces the command * socket anymore so there's no need to lock it. */ @@ -1001,7 +1248,7 @@ void lxc_end(struct lxc_handler *handler) if (handler->conf->ephemeral == 1 && handler->conf->reboot != REBOOT_REQ) lxc_destroy_container_on_signal(handler, name); - lxc_put_handler(handler); + lxc_free_handler(handler); } void lxc_abort(struct lxc_handler *handler) @@ -1032,16 +1279,36 @@ static int do_start(void *data) struct lxc_handler *handler = data; __lxc_unused __do_close int data_sock0 = handler->data_sock[0], data_sock1 = handler->data_sock[1]; - __do_close int devnull_fd = -EBADF, status_fd = -EBADF; + __do_close int status_fd = -EBADF; int ret; uid_t new_uid; gid_t new_gid; struct lxc_list *iterator; uid_t nsuid = 0; gid_t nsgid = 0; + int devnull_fd = -1; lxc_sync_fini_parent(handler); +#ifdef HAVE_ISULAD + sigset_t mask; + + /*isulad: restore default signal handlers and unblock all signals*/ + for (int i = 1; i < NSIG; i++) + signal(i, SIG_DFL); + + ret = sigfillset(&mask); + if (ret < 0) { + SYSERROR("Failed to fill signal mask"); + goto out_warn_father; + } + ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); + if (ret < 0) { + SYSERROR("Failed to set signal mask"); + goto out_warn_father; + } +#endif + if (lxc_abstract_unix_recv_fds(data_sock1, &status_fd, 1, NULL, 0) < 0) { ERROR("Failed to receive status file descriptor to child process"); goto out_warn_father; @@ -1155,7 +1422,11 @@ static int do_start(void *data) * means that migration won't work, but at least we won't spew output * where it isn't wanted. */ +#ifdef HAVE_ISULAD + if (!handler->disable_pty && handler->daemonize && !handler->conf->autodev) { +#else if (handler->daemonize && !handler->conf->autodev) { +#endif char path[PATH_MAX]; ret = snprintf(path, sizeof(path), "%s/dev/null", @@ -1221,6 +1492,9 @@ static int do_start(void *data) /* Setup the container, ip, names, utsname, ... */ ret = lxc_setup(handler); if (ret < 0) { +#ifdef HAVE_ISULAD + lxc_write_error_message(handler->conf->errpipe[1], "Failed to setup lxc, please check the config file."); +#endif ERROR("Failed to setup container \"%s\"", handler->name); goto out_warn_father; } @@ -1243,23 +1517,82 @@ static int do_start(void *data) DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges"); } +#ifdef HAVE_ISULAD + /* isulad: dup2 pipe[0][0] to container stdin, pipe[1][1] to container stdout, pipe[2][1] to container stderr */ + if (handler->disable_pty) { + if (handler->conf->console.pipes[0][1] >= 0) { + close(handler->conf->console.pipes[0][1]); + handler->conf->console.pipes[0][1] = -1; + } + + if (handler->conf->console.pipes[0][0] >= 0) { + ret = dup2(handler->conf->console.pipes[0][0], STDIN_FILENO); + if (ret < 0) + goto out_warn_father; + } + + if (handler->conf->console.pipes[1][0] >= 0) { + close(handler->conf->console.pipes[1][0]); + handler->conf->console.pipes[1][0] = -1; + } + + if (handler->conf->console.pipes[1][1] >= 0) { + ret = dup2(handler->conf->console.pipes[1][1], STDOUT_FILENO); + if (ret < 0) + goto out_warn_father; + } + if (handler->conf->console.pipes[2][0] >= 0) { + close(handler->conf->console.pipes[2][0]); + handler->conf->console.pipes[2][0] = -1; + } + + if (handler->conf->console.pipes[2][1] >= 0) { + ret = dup2(handler->conf->console.pipes[2][1], STDERR_FILENO); + if (ret < 0) + goto out_warn_father; + } + } +#endif + /* Some init's such as busybox will set sane tty settings on stdin, * stdout, stderr which it thinks is the console. We already set them * the way we wanted on the real terminal, and we want init to do its * setup on its console ie. the pty allocated in lxc_terminal_setup() so * make sure that that pty is stdin,stdout,stderr. */ - if (handler->conf->console.pts >= 0) { + setsid(); +#ifdef HAVE_ISULAD + if (!handler->disable_pty && handler->conf->console.slave >= 0) { + /* isulad:make the given terminal as controlling terminal to avoid warning + * sh: cannot set terminal process group (-1): Inappropriate ioctl for device + * sh: no job control in this shell */ + if (ioctl(handler->conf->console.slave, TIOCSCTTY, NULL) < 0) { + ERROR("Faild to make the given terminal the controlling terminal of the calling process"); + goto out_warn_father; + } + if (handler->daemonize || !handler->conf->is_execute) + ret = set_stdfds(handler->conf->console.slave); + else + ret = lxc_terminal_set_stdfds(handler->conf->console.slave); + if (ret < 0) { + ERROR("Failed to redirect std{in,out,err} to pty file " + "descriptor %d", handler->conf->console.slave); + goto out_warn_father; + } + } +#else + if (handler->conf->console.slave >= 0) { if (handler->daemonize || !handler->conf->is_execute) - ret = set_stdfds(handler->conf->console.pts); + ret = set_stdfds(handler->conf->console.slave); else - ret = lxc_terminal_set_stdfds(handler->conf->console.pts); + ret = lxc_terminal_set_stdfds(handler->conf->console.slave); if (ret < 0) { ERROR("Failed to redirect std{in,out,err} to pty file descriptor %d", - handler->conf->console.pts); + handler->conf->console.slave); goto out_warn_father; } } +#endif /* If we mounted a temporary proc, then unmount it now. */ tmp_proc_unmount(handler->conf); @@ -1283,7 +1616,8 @@ static int do_start(void *data) close_prot_errno_disarm(handler->sigfd); - if (handler->conf->console.pts < 0 && handler->daemonize) { + #ifdef HAVE_ISULAD + if (!handler->disable_pty && handler->conf->console.slave < 0 && handler->daemonize) { if (devnull_fd < 0) { devnull_fd = open_devnull(); if (devnull_fd < 0) @@ -1296,12 +1630,35 @@ static int do_start(void *data) goto out_warn_father; } } + #else + if (handler->conf->console.slave < 0 && handler->daemonize) { + if (devnull_fd < 0) { + devnull_fd = open_devnull(); + if (devnull_fd < 0) + goto out_warn_father; + } - close_prot_errno_disarm(devnull_fd); + ret = set_stdfds(devnull_fd); + if (ret < 0) { + ERROR("Failed to redirect std{in,out,err} to \"/dev/null\""); + goto out_warn_father; + } + } + #endif - setsid(); + close_prot_errno_disarm(devnull_fd); if (handler->conf->init_cwd) { +#ifdef HAVE_ISULAD + /* try to craete workdir if not exist */ + struct stat st; + if (stat(handler->conf->init_cwd, &st) < 0 && mkdir_p(handler->conf->init_cwd, 0755) < 0) { + SYSERROR("Try to create directory \"%s\" as workdir failed", handler->conf->init_cwd); + lxc_write_error_message(handler->conf->errpipe[1], "%s:%d: Failed to create workdir: %s.", + __FILE__, __LINE__, strerror(errno)); + goto out_warn_father; + } +#endif ret = chdir(handler->conf->init_cwd); if (ret < 0) { SYSERROR("Could not change directory to \"%s\"", @@ -1345,6 +1702,13 @@ static int do_start(void *data) } } +#ifdef HAVE_ISULAD + if (prctl(PR_SET_KEEPCAPS, 1) < 0) { + SYSERROR("Failed to keep permitted capabilities"); + goto out_warn_father; + } +#endif + /* The container has been setup. We can now switch to an unprivileged * uid/gid. */ @@ -1358,6 +1722,13 @@ static int do_start(void *data) if (new_gid == nsgid) new_gid = LXC_INVALID_GID; +#ifdef HAVE_ISULAD + // isulad: set env home in container + if (lxc_setup_env_home(new_uid) < 0) { + goto out_warn_father; + } +#endif + /* Make sure that the processes STDIO is correctly owned by the user that we are switching to */ ret = fix_stdio_permissions(new_uid); if (ret) @@ -1371,8 +1742,16 @@ static int do_start(void *data) #if HAVE_LIBCAP if (lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE)) #endif + #ifdef HAVE_ISULAD + /* isulad: set groups for init process, and before we set uid and gid */ + if (!lxc_setgroups(handler->conf->init_groups_len, handler->conf->init_groups)) { + ERROR("Can not set groups"); + goto out_warn_father; + } + #else if (!lxc_setgroups(0, NULL)) goto out_warn_father; + #endif if (!lxc_switch_uid_gid(new_uid, new_gid)) goto out_warn_father; @@ -1383,6 +1762,19 @@ static int do_start(void *data) goto out_warn_father; } +#ifdef HAVE_ISULAD + /* isulad: drop the cap of current process */ + if (prctl(PR_SET_KEEPCAPS, 0) < 0) { + SYSERROR("Failed to clear permitted capabilities"); + goto out_warn_father; + } + + if (lxc_drop_caps(handler->conf)) { + SYSERROR("Failed to drop caps"); + goto out_warn_father; + } +#endif + if (handler->conf->monitor_signal_pdeath != SIGKILL) { ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath, handler->monitor_pid, status_fd); @@ -1393,20 +1785,25 @@ static int do_start(void *data) } } - /* - * After this call, we are in error because this ops should not return + /* After this call, we are in error because this ops should not return * as it execs. */ +#ifdef HAVE_ISULAD + close_prot_errno_disarm(status_fd); + handler->ops->start(handler, handler->data, handler->daemonize ? handler->conf->errpipe[1] : -1); +#else handler->ops->start(handler, handler->data); +#endif out_warn_father: - /* - * We want the parent to know something went wrong, so we return a + /* We want the parent to know something went wrong, so we return a * special error code. */ lxc_sync_wake_parent(handler, LXC_SYNC_ERROR); out_error: + close_prot_errno_disarm(devnull_fd); + return -1; } @@ -1435,9 +1832,9 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler) tty = &ttys->tty[i]; tty->busy = -1; - tty->ptmx = ttyfds[0]; - tty->pts = ttyfds[1]; - TRACE("Received pty with ptmx fd %d and pts fd %d from child", tty->ptmx, tty->pts); + tty->master = ttyfds[0]; + tty->slave = ttyfds[1]; + TRACE("Received pty with master fd %d and slave fd %d from child", tty->master, tty->slave); } if (ret < 0) @@ -1529,6 +1926,94 @@ static inline int do_share_ns(void *arg) return 0; } +#ifdef HAVE_ISULAD +static int lxc_write_container_info(char *filename, pid_t pid, pid_t p_pid, + unsigned long long start_at, unsigned long long p_start_at) +{ + FILE *pid_fp = NULL; + int ret = 0; + + pid_fp = lxc_fopen(filename, "w"); + if (pid_fp == NULL) { + SYSERROR("Failed to create pidfile '%s'",filename); + ret = -1; + goto out; + } + + if (fprintf(pid_fp, "%d %llu %d %llu\n", pid, start_at, p_pid, p_start_at) < 0) { + SYSERROR("Failed to write '%s'", filename); + ret = -1; + goto out; + } +out: + if (pid_fp) + fclose(pid_fp); + pid_fp = NULL; + return ret; +} + +static int lxc_check_container_info(char *filename, pid_t pid, pid_t p_pid, + unsigned long long start_at, unsigned long long p_start_at) +{ + int ret = 0; + int num; + char sbuf[1024] = {0}; /* bufs for stat */ + int saved_pid; /* process id */ + int saved_ppid; /* pid of parent process */ + unsigned long long saved_start_time; /* start time of process -- seconds since 1-1-70 */ + unsigned long long saved_pstart_time; /* start time of parent process -- seconds since 1-1-70 */ + + if ((lxc_file2str(filename, sbuf, sizeof(sbuf))) == -1) { + SYSERROR("Failed to read pidfile %s", filename); + ret = -1; + goto out; + } + + num = sscanf(sbuf, "%d %Lu %d %Lu", &saved_pid, &saved_start_time, &saved_ppid, &saved_pstart_time); + if (num != 4) { + SYSERROR("Call sscanf error"); + ret = -1; + goto out; + } + + if (pid != saved_pid || p_pid != saved_ppid + || start_at != saved_start_time || p_start_at != saved_pstart_time) { + ERROR("Check container info failed"); + ret = -1; + goto out; + } + +out: + return ret; +} + +/* isuald: save pid/ppid info */ +static int lxc_save_container_info(char *filename, pid_t pid) +{ + int ret = 0; + pid_t p_pid = 0; + unsigned long long start_at = 0; + unsigned long long p_start_at = 0; + + start_at = lxc_get_process_startat(pid); + p_pid = getpid(); + p_start_at = lxc_get_process_startat(p_pid); + + ret = lxc_write_container_info(filename, pid, p_pid, start_at, p_start_at); + if (ret != 0) { + goto out; + } + + ret = lxc_check_container_info(filename, pid, p_pid, start_at, p_start_at); + if (ret != 0) { + goto out; + } + +out: + return ret; +} +#endif + /* lxc_spawn() performs crucial setup tasks and clone()s the new process which * exec()s the requested container binary. * Note that lxc_spawn() runs in the parent namespaces. Any operations performed @@ -1640,6 +2125,32 @@ static int lxc_spawn(struct lxc_handler *handler) } TRACE("Cloned child process %d", handler->pid); +#ifdef HAVE_ISULAD + /* isulad: close pipe after clone */ + if (handler->conf->console.pipes[0][0] >= 0) { + close(handler->conf->console.pipes[0][0]); + handler->conf->console.pipes[0][0] = -1; + } + + if (handler->conf->console.pipes[1][1] >= 0) { + close(handler->conf->console.pipes[1][1]); + handler->conf->console.pipes[1][1] = -1; + } + + if (handler->conf->console.pipes[2][1] >= 0) { + close(handler->conf->console.pipes[2][1]); + handler->conf->console.pipes[2][1] = -1; + } + + /* isulad: save pid/ppid info into file*/ + if (handler->conf->container_info_file) { + if (lxc_save_container_info(handler->conf->container_info_file, handler->pid)) { + ERROR("Failed to save cloned container pid"); + goto out_delete_net; + } + } +#endif + /* Verify that we can actually make use of pidfds. */ if (!lxc_can_use_pidfd(handler->pidfd)) close_prot_errno_disarm(handler->pidfd); @@ -1652,6 +2163,13 @@ static int lxc_spawn(struct lxc_handler *handler) if (ret < 0) SYSERROR("Failed to set environment variable: LXC_PID=%s", pidstr); +#ifdef HAVE_ISULAD + if (handler->cgroup_ops->container_cgroup) { + if (setenv("LXC_CGROUP_PATH", handler->cgroup_ops->container_cgroup, 1)) + SYSERROR("Failed to set environment variable: LXC_CGROUP_PATH=%s.", handler->cgroup_ops->container_cgroup); + } +#endif + for (i = 0; i < LXC_NS_MAX; i++) if (handler->ns_on_clone_flags & ns_info[i].clone_flag) INFO("Cloned %s", ns_info[i].flag_name); @@ -1765,7 +2283,11 @@ static int lxc_spawn(struct lxc_handler *handler) goto out_delete_net; if (!lxc_list_empty(&conf->limits)) { +#ifdef HAVE_ISULAD + ret = setup_resource_limits(&conf->limits, handler->pid, conf->errpipe[1]); +#else ret = setup_resource_limits(&conf->limits, handler->pid); +#endif if (ret < 0) { ERROR("Failed to setup resource limits"); goto out_delete_net; @@ -1776,12 +2298,7 @@ static int lxc_spawn(struct lxc_handler *handler) if (ret < 0) goto out_delete_net; - /* - * with isolation the limiting devices cgroup was already setup, so - * only setup devices here if we have no namespace directory - */ - if (!handler->conf->cgroup_meta.namespace_dir && - !cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) { + if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) { ERROR("Failed to setup legacy device cgroup controller limits"); goto out_delete_net; } @@ -1816,6 +2333,26 @@ static int lxc_spawn(struct lxc_handler *handler) ERROR("Failed to run lxc.hook.start-host"); goto out_delete_net; } +#ifdef HAVE_ISULAD + /* isulad: Run oci prestart hook at here */ + ret = run_oci_hooks(name, "oci-prestart", conf, lxcpath); + if (ret < 0) { + ERROR("Failed to run oci prestart hooks"); + goto out_delete_net; + } + + if (START_TIMEOUT == global_timeout_state) { + lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name); + ERROR("Starting the container \"%s\" timeout.", name); + goto out_delete_net; + } + + /* Tell the child to continue its initialization. We'll get + * LXC_SYNC_POST_OCI_PRESTART_HOOK when it is ready for us to run oci prestart hooks. + */ + if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_OCI_PRESTART_HOOK)) + goto out_delete_net; +#endif /* Tell the child to complete its initialization and wait for it to exec * or return an error. (The child will never return @@ -1859,6 +2396,22 @@ static int lxc_spawn(struct lxc_handler *handler) if (ret < 0) goto out_abort; +#ifdef HAVE_ISULAD + /* isulad: Run oci prestart hook at here */ + ret = run_oci_hooks(name, "oci-poststart", conf, lxcpath); + if (ret < 0) { + ERROR("Failed to run oci poststart hooks"); + goto out_abort; + } + + if (START_TIMEOUT == global_timeout_state) { + lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name); + ERROR("Starting the container \"%s\" timeout.", name); + goto out_abort; + } + +#endif + ret = lxc_set_state(name, handler, RUNNING); if (ret < 0) { ERROR("Failed to set state to \"%s\"", lxc_state2str(RUNNING)); @@ -1883,9 +2436,82 @@ out_sync_fini: return -1; } +#ifdef HAVE_ISULAD +/* isulad: start timeout thread function */ +static void* wait_start_timeout(void *arg) +{ + struct start_timeout_conf *conf = (struct start_timeout_conf *)arg; + + sem_post(&global_timeout_sem); + + if (!conf || conf->timeout < 1) + goto out; + + sleep(conf->timeout); + + global_timeout_state = START_TIMEOUT; + +out: + free(conf); + return ((void *)0); +} + +/* isulad: create start timeout thread */ +static int create_start_timeout_thread(struct lxc_conf *conf, unsigned int start_timeout) +{ + int ret = 0; + pthread_t ptid; + pthread_attr_t attr; + struct start_timeout_conf *timeout_conf = NULL; + + if (sem_init(&global_timeout_sem, 0, 0)) { + ERROR("Failed to init start timeout semaphore");/*lint !e613*/ + ret = -1; + return ret; + } + + timeout_conf = malloc(sizeof(struct start_timeout_conf)); + if (timeout_conf == NULL) { + ERROR("Failed to malloc start timeout conf"); + ret = -1; + goto out; + } + + memset(timeout_conf, 0, sizeof(struct start_timeout_conf)); + timeout_conf->errfd = conf->errpipe[1]; + timeout_conf->timeout = start_timeout; + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + ret = pthread_create(&ptid, &attr, wait_start_timeout, timeout_conf); + if (ret != 0) { + ERROR("Create start wait timeout thread failed"); + free(timeout_conf); + goto out; + } + + sem_wait(&global_timeout_sem); +out: + sem_destroy(&global_timeout_sem); + return ret; +} + +// isulad: send '128 + signal' if container is killed by signal. +#define EXIT_SIGNAL_OFFSET 128 +#endif + + +#ifdef HAVE_ISULAD +int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, + void *data, const char *lxcpath, bool daemonize, int *error_num, + unsigned int start_timeout) +{ + int exit_code; +#else int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, void *data, const char *lxcpath, bool daemonize, int *error_num) { +#endif int ret, status; const char *name = handler->name; struct lxc_conf *conf = handler->conf; @@ -1901,6 +2527,16 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, handler->daemonize = daemonize; cgroup_ops = handler->cgroup_ops; +#ifdef HAVE_ISULAD + /* isulad: add start timeout limit */ + if (start_timeout > 0) { + ret = create_start_timeout_thread(conf, start_timeout); + if (ret) { + ERROR("Failed to create start timeout thread for container \"%s\".", name); + goto out_abort; + } + } +#endif if (!attach_block_device(handler->conf)) { ERROR("Failed to attach block device"); ret = -1; @@ -1935,7 +2571,7 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, } INFO("Unshared CLONE_NEWNS"); - turn_into_dependent_mounts(); + remount_all_slave(); ret = lxc_setup_rootfs_prepare_root(conf, name, lxcpath); if (ret < 0) { ERROR("Error setting up rootfs mount as root before spawn"); @@ -1959,11 +2595,13 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, goto out_delete_network; } +#ifndef HAVE_ISULAD if (!handler->init_died && handler->pid > 0) { ERROR("Child process is not killed"); ret = -1; goto out_delete_network; } +#endif status = lxc_wait_for_pid_status(handler->pid); if (status < 0) @@ -1973,6 +2611,21 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, * reboot. This should mean it was an lxc-execute which simply exited. * In any case, treat it as a 'halt'. */ +#ifdef HAVE_ISULAD + // isulad: recored log for container init exit + if (WIFSIGNALED(status)) { + int signal = WTERMSIG(status); + signal = WTERMSIG(status); + exit_code = EXIT_SIGNAL_OFFSET + signal; + ERROR("Container \"%s\" init exited with signal %d", name, signal); + } else if (WIFEXITED(status)) { + exit_code = WEXITSTATUS(status); + ERROR("Container \"%s\" init exited with status %d", name, exit_code); + } else { + exit_code = -1; + ERROR("Container \"%s\" init exited with unknown status", name); + } +#else if (WIFSIGNALED(status)) { switch(WTERMSIG(status)) { case SIGINT: /* halt */ @@ -1990,6 +2643,7 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, break; } } +#endif ret = lxc_restore_phys_nics_to_netns(handler); if (ret < 0) @@ -1997,11 +2651,20 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, close_prot_errno_disarm(handler->pinfd); +#ifdef HAVE_ISULAD + lxc_monitor_send_exit_code(name, exit_code, handler->lxcpath); +#else lxc_monitor_send_exit_code(name, status, handler->lxcpath); +#endif + lxc_error_set_and_log(handler->pid, status); if (error_num) *error_num = handler->exit_status; +#ifdef HAVE_ISULAD + handler->exit_code = exit_code; /* record exit code */ +#endif + /* These are not the droids you are looking for. */ __private_goto1: lxc_delete_network(handler); @@ -2032,7 +2695,11 @@ struct start_args { char *const *argv; }; +#ifdef HAVE_ISULAD +static int start(struct lxc_handler *handler, void* data, int fd) +#else static int start(struct lxc_handler *handler, void* data) +#endif { struct start_args *arg = data; @@ -2040,6 +2707,9 @@ static int start(struct lxc_handler *handler, void* data) execvp(arg->argv[0], arg->argv); SYSERROR("Failed to exec \"%s\"", arg->argv[0]); +#ifdef HAVE_ISULAD + lxc_write_error_message(fd, "exec: \"%s\": %s.", arg->argv[0], strerror(errno)); +#endif return 0; } @@ -2057,14 +2727,18 @@ static struct lxc_operations start_ops = { }; int lxc_start(char *const argv[], struct lxc_handler *handler, - const char *lxcpath, bool daemonize, int *error_num) + const char *lxcpath, bool daemonize, int *error_num, unsigned int start_timeout) { struct start_args start_arg = { .argv = argv, }; TRACE("Doing lxc_start"); +#ifdef HAVE_ISULAD + return __lxc_start(handler, &start_ops, &start_arg, lxcpath, daemonize, error_num, start_timeout); +#else return __lxc_start(handler, &start_ops, &start_arg, lxcpath, daemonize, error_num); +#endif } static void lxc_destroy_container_on_signal(struct lxc_handler *handler, @@ -2136,3 +2810,261 @@ static bool do_destroy_container(struct lxc_handler *handler) return storage_destroy(handler->conf); } + +#ifdef HAVE_ISULAD +/*isulad: set env for clean resources */ +static int clean_resource_set_env(struct lxc_handler *handler) +{ + const char *name = handler->name; + struct lxc_conf *conf = handler->conf; + char bufstr[PATH_MAX + 1]; + int i = 0; + int j = 0; + int len = 2; //set "LXC_PID" and "LXC_CGNS_AWARE" + + if (conf == NULL || conf->ocihooks == NULL || conf->ocihooks->poststop_len == 0) { + return 0; + } + + if (name) { + len++; + } + if (conf->rcfile) { + len++; + } + if (conf->rootfs.mount) { + len++; + } + if (conf->rootfs.path) { + len++; + } + if (conf->console.path) { + len++; + } + if (conf->console.log_path) { + len++; + } + if (handler->cgroup_ops->container_cgroup) { + len++; + } + + for (; i < conf->ocihooks->poststop_len; i++) { + size_t cap = conf->ocihooks->poststop[i]->env_len; + size_t newcap = cap + len + 1; + if (lxc_grow_array((void ***)&(conf->ocihooks->poststop[i]->env), &cap, newcap, 1) != 0) { + return -1; + } + j = conf->ocihooks->poststop[i]->env_len; + /* Start of environment variable setup for hooks. */ + if (name) { + snprintf(bufstr, PATH_MAX + 1, "LXC_NAME=%s", name); + conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); + } + if (conf->rcfile) { + snprintf(bufstr, PATH_MAX + 1, "LXC_CONFIG_FILE=%s", conf->rcfile); + conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); + } + if (conf->rootfs.mount) { + snprintf(bufstr, PATH_MAX + 1, "LXC_ROOTFS_MOUNT=%s", conf->rootfs.mount); + conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); + } + if (conf->rootfs.path) { + snprintf(bufstr, PATH_MAX + 1, "LXC_ROOTFS_PATH=%s", conf->rootfs.path); + conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); + } + if (conf->console.path) { + snprintf(bufstr, PATH_MAX + 1, "LXC_CONSOLE=%s", conf->console.path); + conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); + } + if (conf->console.log_path) { + snprintf(bufstr, PATH_MAX + 1, "LXC_CONSOLE_LOGPATH=%s", conf->console.log_path); + conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); + } + conf->ocihooks->poststop[i]->env[j++] = safe_strdup("LXC_CGNS_AWARE=1"); + + snprintf(bufstr, PATH_MAX + 1, "LXC_PID=%d", handler->pid); + conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); + if (handler->cgroup_ops->container_cgroup) { + snprintf(bufstr, PATH_MAX + 1, "LXC_CGROUP_PATH=%s", handler->cgroup_ops->container_cgroup); + conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); + } + conf->ocihooks->poststop[i]->env_len = j; + /* End of environment variable setup for hooks. */ + } + return 0; +} + +/*isulad: init handler for clean */ +static struct lxc_handler *lxc_init_clean_handler(char *name, char *lxcpath, struct lxc_conf *conf, pid_t pid) +{ + int i; + struct lxc_handler *handler; + + handler = malloc(sizeof(*handler)); + if (handler == NULL) + return NULL; + + memset(handler, 0, sizeof(*handler)); + + /* Note that am_guest_unpriv() checks the effective uid. We + * probably don't care if we are real root only if we are running + * as root so this should be fine. + */ + handler->am_root = !am_guest_unpriv(); + handler->data_sock[0] = handler->data_sock[1] = -1; + handler->conf = conf; + handler->lxcpath = lxcpath; + handler->pinfd = -1; + handler->sigfd = -EBADF; + handler->pidfd = -EBADF; + handler->init_died = false; + handler->monitor_status_fd = -EBADF; + handler->pid = pid; + handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1; + if (handler->conf->reboot == REBOOT_NONE) + lxc_list_init(&handler->conf->state_clients); + + for (i = 0; i < LXC_NS_MAX; i++) + handler->nsfd[i] = -1; + + handler->name = name; + handler->exit_code = -1; /* isulad: record exit code of container */ + + handler->cgroup_ops = cgroup_init(conf); + if (!handler->cgroup_ops) { + ERROR("Failed to initialize cgroup driver"); + goto on_error; + } + + INFO("Container \"%s\" 's clean handler is initialized.", name); + + return handler; + +on_error: + lxc_free_handler(handler); + + return NULL; +} + +/*isulad: init handler for clean */ +static struct lxc_handler *lxc_init_pids_handler(char *name, char *lxcpath, struct lxc_conf *conf) +{ + int i; + struct lxc_handler *handler; + + handler = malloc(sizeof(*handler)); + if (handler == NULL) + return NULL; + + memset(handler, 0, sizeof(*handler)); + + /* Note that am_guest_unpriv() checks the effective uid. We + * probably don't care if we are real root only if we are running + * as root so this should be fine. + */ + handler->am_root = !am_guest_unpriv(); + handler->data_sock[0] = handler->data_sock[1] = -1; + handler->conf = conf; + handler->lxcpath = lxcpath; + handler->pinfd = -1; + handler->sigfd = -EBADF; + handler->init_died = false; + handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1; + handler->monitor_status_fd = -EBADF; + handler->pidfd = -EBADF; + if (handler->conf->reboot == REBOOT_NONE) + lxc_list_init(&handler->conf->state_clients); + + for (i = 0; i < LXC_NS_MAX; i++) + handler->nsfd[i] = -1; + + handler->name = name; + handler->exit_code = -1; /* isulad: record exit code of container */ + + handler->cgroup_ops = cgroup_init(conf); + if (!handler->cgroup_ops) { + ERROR("Failed to initialize cgroup driver"); + goto on_error; + } + + INFO("Container \"%s\" 's clean handler is initialized.", name); + + return handler; + +on_error: + lxc_free_handler(handler); + + return NULL; +} + +/*isulad: do_lxcapi_clean_resource */ +int do_lxcapi_clean_resource(char *name, char *lxcpath, struct lxc_conf *conf, pid_t pid) +{ + int ret = 0; + struct lxc_handler *handler = NULL; + int retry_count = 0; + int max_retry = 10; + + handler = lxc_init_clean_handler(name, lxcpath, conf, pid); + if (!handler) { + ERROR("Failed to init container %s clean handler", name); + ret = -1; + goto out; + } + + if (clean_resource_set_env(handler) != 0) { + ERROR("Failed to set env for poststop hooks"); + ret = -1; + goto out; + } + + if (run_oci_hooks(handler->name, "oci-poststop", handler->conf, handler->lxcpath)) { + ERROR("Failed to run lxc.hook.post-stop for container \"%s\".", handler->name); + ret = -1; + } + +retry: + if (!handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler)) { + TRACE("Trying to kill all subprocess"); + signal_all_processes(handler); + TRACE("Finished kill all subprocess"); + if (retry_count < max_retry) { + usleep(100 * 1000); /* 100 millisecond */ + retry_count++; + goto retry; + } + SYSERROR("Failed to destroy cgroup path for container: \"%s\"", handler->name); + ret = -1; + } + +out: + lxc_free_handler(handler); + return ret; +} + +/*isulad: do_lxcapi_get_pids */ +int do_lxcapi_get_pids(char *name, char *lxcpath, struct lxc_conf *conf, pid_t **pids,size_t *pids_len) +{ + int ret = 0; + struct lxc_handler *handler = NULL; + struct cgroup_ops *cg_ops = NULL; + + handler = lxc_init_pids_handler(name, lxcpath, conf); + if (!handler) { + ERROR("Failed to init container %s clean handler", name); + ret = -1; + goto out; + } + + cg_ops = handler->cgroup_ops; + ret = get_all_pids(cg_ops, pids, pids_len); + if (ret < 0) { + WARN("failed to get all pids"); + } + +out: + lxc_free_handler(handler); + return ret; +} + +#endif diff --git a/src/lxc/start.h b/src/lxc/start.h index ece4aac47..ebeeb72ea 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -10,7 +10,6 @@ #include #include "conf.h" -#include "macro.h" #include "namespace.h" #include "state.h" @@ -124,8 +123,14 @@ struct lxc_handler { struct cgroup_ops *cgroup_ops; - /* Internal fds that always need to stay open. */ - int keep_fds[3]; +#ifdef HAVE_ISULAD + int exit_code;/* isulad: record the exit code of container */ + /* Indicates whether should we using pipes or pty dup to std{in,out,err} for console log. */ + bool disable_pty; + /* Indicates whether should we keep stdin active. */ + bool open_stdin; +#endif + }; struct execute_args { @@ -136,7 +141,11 @@ struct execute_args { }; struct lxc_operations { +#ifdef HAVE_ISULAD + int (*start)(struct lxc_handler *, void *, int); +#else int (*start)(struct lxc_handler *, void *); +#endif int (*post_start)(struct lxc_handler *, void *); }; @@ -147,11 +156,12 @@ extern int lxc_serve_state_clients(const char *name, struct lxc_handler *handler, lxc_state_t state); extern void lxc_abort(struct lxc_handler *handler); -extern struct lxc_handler *lxc_init_handler(struct lxc_handler *old, - const char *name, +extern struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, - const char *lxcpath, bool daemonize); -extern void lxc_put_handler(struct lxc_handler *handler); + const char *lxcpath, + bool daemonize); +extern void lxc_zero_handler(struct lxc_handler *handler); +extern void lxc_free_handler(struct lxc_handler *handler); extern int lxc_init(const char *name, struct lxc_handler *handler); extern void lxc_end(struct lxc_handler *handler); @@ -164,14 +174,23 @@ extern void lxc_end(struct lxc_handler *handler); */ extern int lxc_check_inherited(struct lxc_conf *conf, bool closeall, int *fds_to_ignore, size_t len_fds); -static inline int inherit_fds(struct lxc_handler *handler, bool closeall) -{ - return lxc_check_inherited(handler->conf, closeall, handler->keep_fds, - ARRAY_SIZE(handler->keep_fds)); -} +#ifdef HAVE_ISULAD +extern int __lxc_start(struct lxc_handler *handler, + struct lxc_operations* ops, void *data, const char *lxcpath, + bool daemonize, int *error_num, unsigned int start_timeout); +#else extern int __lxc_start(struct lxc_handler *, struct lxc_operations *, void *, const char *, bool, int *); +#endif extern int resolve_clone_flags(struct lxc_handler *handler); +#ifdef HAVE_ISULAD +/*isulad: do_lxcapi_clean_resource */ +extern int do_lxcapi_clean_resource(char *name, char *lxcpath, struct lxc_conf *conf, pid_t pid); + +/*isulad: do_lxcapi_get_pids */ +extern int do_lxcapi_get_pids(char *name, char *lxcpath, struct lxc_conf *conf, pid_t **pids,size_t *pids_len); +#endif + #endif diff --git a/src/lxc/storage/block.c b/src/lxc/storage/block.c new file mode 100644 index 000000000..eb75e7065 --- /dev/null +++ b/src/lxc/storage/block.c @@ -0,0 +1,86 @@ +/* + * lxc: linux Container library + * + * (C) Copyright IBM Corp. 2007, 2008 + * + * Authors: + * Daniel Lezcano + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#include +#include + +#include "config.h" +#include "log.h" +#include "storage.h" +#include "storage_utils.h" +#include "utils.h" + +lxc_log_define(blk, lxc); + +int blk_destroy(struct lxc_storage *orig) +{ + return 0; +} + +bool blk_detect(const char *path) +{ + struct stat statbuf; + int ret; + + if (!strncmp(path, "blk:", 4)) + return true; + + ret = stat(path, &statbuf); + if (ret == -1 && errno == EPERM) { + SYSERROR("blk_detect: failed to look at \"%s\"", path); + return false; + } + + if (ret == 0 && S_ISBLK(statbuf.st_mode)) + return true; + + return false; +} + +int blk_mount(struct lxc_storage *bdev) +{ + const char *src; + if (strcmp(bdev->type, "blk")) + return -22; + + if (!bdev->src || !bdev->dest) + return -22; + + src = lxc_storage_get_path(bdev->src, bdev->type); + + return mount_unknown_fs(src, bdev->dest, bdev->mntopts); +} + +int blk_umount(struct lxc_storage *bdev) +{ + if (strcmp(bdev->type, "blk")) + return -22; + + if (!bdev->src || !bdev->dest) + return -22; + + return umount(bdev->dest); +} diff --git a/src/lxc/storage/block.h b/src/lxc/storage/block.h new file mode 100644 index 000000000..2fa7565fb --- /dev/null +++ b/src/lxc/storage/block.h @@ -0,0 +1,41 @@ +/* + * lxc: linux Container library + * + * (C) Copyright IBM Corp. 2007, 2008 + * + * Authors: + * Daniel Lezcano + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __LXC_BLK_H +#define __LXC_BLK_H + +#include +#include + +struct lxc_storage; + +struct bdev_specs; + +struct lxc_conf; + +extern int blk_destroy(struct lxc_storage *orig); +extern bool blk_detect(const char *path); +extern int blk_mount(struct lxc_storage *bdev); +extern int blk_umount(struct lxc_storage *bdev); + +#endif /* __LXC_BLK_H */ diff --git a/src/lxc/storage/btrfs.c b/src/lxc/storage/btrfs.c index 92a4a6def..069a9dd84 100644 --- a/src/lxc/storage/btrfs.c +++ b/src/lxc/storage/btrfs.c @@ -197,16 +197,27 @@ int btrfs_mount(struct lxc_storage *bdev) const char *src; int ret; +#ifdef HAVE_ISULAD + unsigned long pflags = 0; +#endif + if (strcmp(bdev->type, "btrfs")) return -22; if (!bdev->src || !bdev->dest) return -22; +#ifdef HAVE_ISULAD + if (parse_mntopts(bdev->mntopts, &mntflags, &pflags, &mntdata) < 0) { + free(mntdata); + return -22; + } +#else if (parse_mntopts(bdev->mntopts, &mntflags, &mntdata) < 0) { free(mntdata); return -22; } +#endif src = lxc_storage_get_path(bdev->src, "btrfs"); diff --git a/src/lxc/storage/dir.c b/src/lxc/storage/dir.c index 18a10a42f..485572a0b 100644 --- a/src/lxc/storage/dir.c +++ b/src/lxc/storage/dir.c @@ -94,6 +94,9 @@ int dir_create(struct lxc_storage *bdev, const char *dest, const char *n, int dir_destroy(struct lxc_storage *orig) { +#ifdef HAVE_ISULAD + // isulad: do not destroy rootfs for directory, it should be managed by caller +#else int ret; const char *src; @@ -102,6 +105,7 @@ int dir_destroy(struct lxc_storage *orig) ret = lxc_rmdir_onedev(src, NULL); if (ret < 0) return log_error_errno(ret, errno, "Failed to delete \"%s\"", src); +#endif return 0; } @@ -124,6 +128,35 @@ bool dir_detect(const char *path) return false; } +#ifdef HAVE_ISULAD +int dir_mount(struct lxc_storage *bdev) +{ + __do_free char *mntdata = NULL; + unsigned long mntflags = 0, pflags = 0; + int ret; + const char *src; + + if (strcmp(bdev->type, "dir")) + return -22; + + if (!bdev->src || !bdev->dest) + return -22; + + ret = parse_mntopts(bdev->mntopts, &mntflags, &pflags, &mntdata); + if (ret < 0) + return log_error_errno(ret, errno, "Failed to parse mount options \"%s\"", bdev->mntopts); + + src = lxc_storage_get_path(bdev->src, bdev->type); + + ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | (mntflags & ~MS_RDONLY) | pflags, mntdata); + if (ret < 0) { + return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest); + } + TRACE("Mounted \"%s\" on \"%s\"", src, bdev->dest); + + return 0; +} +#else int dir_mount(struct lxc_storage *bdev) { __do_free char *mntdata = NULL; @@ -161,11 +194,12 @@ int dir_mount(struct lxc_storage *bdev) DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"", src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags); } - TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"", src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags); + return 0; } +#endif int dir_umount(struct lxc_storage *bdev) { diff --git a/src/lxc/storage/loop.c b/src/lxc/storage/loop.c index eebc1b67c..345be503b 100644 --- a/src/lxc/storage/loop.c +++ b/src/lxc/storage/loop.c @@ -21,6 +21,7 @@ #include "memory_utils.h" #include "storage.h" #include "storage_utils.h" +#include "lxclock.h" #include "utils.h" lxc_log_define(loop, lxc); @@ -216,9 +217,11 @@ bool loop_detect(const char *path) int loop_mount(struct lxc_storage *bdev) { - int ret, loopfd; + int ret = 0; + int loopfd, lret; char loname[PATH_MAX]; const char *src; + struct lxc_lock *l = NULL; if (strcmp(bdev->type, "loop")) return -22; @@ -226,13 +229,29 @@ int loop_mount(struct lxc_storage *bdev) if (!bdev->src || !bdev->dest) return -22; + /* isulad: do lock before mount, so we can avoid use loop which is used by + * other starting contianers */ + l = lxc_newlock("mount_lock", "mount_lock"); + if (!l) { + SYSERROR("create file lock error when mount fs"); + return -1; + } + + lret = lxclock(l, 0); + if (lret) { + SYSERROR("try to lock failed when mount fs"); + ret = -1; + goto out; + } + /* skip prefix */ src = lxc_storage_get_path(bdev->src, bdev->type); loopfd = lxc_prepare_loop_dev(src, loname, LO_FLAGS_AUTOCLEAR); if (loopfd < 0) { ERROR("Failed to prepare loop device for loop file \"%s\"", src); - return -1; + ret = -1; + goto out; } DEBUG("Prepared loop device \"%s\"", loname); @@ -241,14 +260,21 @@ int loop_mount(struct lxc_storage *bdev) ERROR("Failed to mount rootfs \"%s\" on \"%s\" via loop device \"%s\"", bdev->src, bdev->dest, loname); close(loopfd); - return -1; + ret = -1; + goto out; } bdev->lofd = loopfd; DEBUG("Mounted rootfs \"%s\" on \"%s\" via loop device \"%s\"", bdev->src, bdev->dest, loname); - - return 0; +out: + lret = lxcunlock(l); + if (lret) { + SYSERROR("try to unlock failed when mount fs"); + ret = -1; + } + lxc_putlock(l); + return ret; } int loop_umount(struct lxc_storage *bdev) diff --git a/src/lxc/storage/overlay.c b/src/lxc/storage/overlay.c index 770785cfd..75a81de15 100644 --- a/src/lxc/storage/overlay.c +++ b/src/lxc/storage/overlay.c @@ -349,6 +349,9 @@ int ovl_mount(struct lxc_storage *bdev) char *work, *lastslash; size_t len, len2; int ret, ret2; +#ifdef HAVE_ISULAD + unsigned long pflags = 0; +#endif if (strcmp(bdev->type, "overlay") && strcmp(bdev->type, "overlayfs")) return -22; @@ -414,7 +417,12 @@ int ovl_mount(struct lxc_storage *bdev) work = must_make_path(upper, LXC_OVERLAY_WORK_DIR, NULL); upper[lastslash - upper] = '/'; +#ifdef HAVE_ISULAD + ret = parse_mntopts(bdev->mntopts, &mntflags, &pflags, &mntdata); +#else ret = parse_mntopts(bdev->mntopts, &mntflags, &mntdata); +#endif + if (ret < 0) { ERROR("Failed to parse mount options"); free(mntdata); diff --git a/src/lxc/storage/rsync.c b/src/lxc/storage/rsync.c index 2e4df2537..97678dea2 100644 --- a/src/lxc/storage/rsync.c +++ b/src/lxc/storage/rsync.c @@ -78,8 +78,12 @@ int lxc_rsync(struct rsync_data *data) return -1; } - if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) - SYSERROR("Failed to recursively turn root mount tree into dependent mount"); + ret = detect_shared_rootfs(); + if (ret) { + ret = mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL); + if (ret < 0) + SYSERROR("Failed to make \"/\" a slave mount"); + } ret = orig->ops->mount(orig); if (ret < 0) { diff --git a/src/lxc/storage/storage.c b/src/lxc/storage/storage.c index 3f1b713f6..5291b244b 100644 --- a/src/lxc/storage/storage.c +++ b/src/lxc/storage/storage.c @@ -41,6 +41,7 @@ #include "storage_utils.h" #include "utils.h" #include "zfs.h" +#include "block.h" #ifndef HAVE_STRLCPY #include "include/strlcpy.h" @@ -94,6 +95,22 @@ static const struct lxc_storage_ops loop_ops = { .can_backup = true, }; +#ifdef HAVE_ISULAD +/* block */ +static const struct lxc_storage_ops blk_ops = { + .detect = &blk_detect, + .mount = &blk_mount, + .umount = &blk_umount, + .clone_paths = NULL, + .destroy = &blk_destroy, + .create = NULL, + .copy = NULL, + .snapshot = NULL, + .can_snapshot = false, + .can_backup = true, +}; +#endif + /* lvm */ static const struct lxc_storage_ops lvm_ops = { .detect = &lvm_detect, @@ -179,6 +196,10 @@ static const struct lxc_storage_type bdevs[] = { { .name = "overlayfs", .ops = &ovl_ops, }, { .name = "loop", .ops = &loop_ops, }, { .name = "nbd", .ops = &nbd_ops, }, +#ifdef HAVE_ISULAD + //isulad: block device + { .name = "blk", .ops = &blk_ops, } +#endif }; static const size_t numbdevs = sizeof(bdevs) / sizeof(struct lxc_storage_type); @@ -570,9 +591,15 @@ bool storage_destroy(struct lxc_conf *conf) int destroy_rv = 0; r = storage_init(conf); +#ifdef HAVE_ISULAD + if (r == NULL) { + WARN("%s 's storage init failed, the storage may be deleted already", conf->name); + return true; + } +#else if (!r) return ret; - +#endif destroy_rv = r->ops->destroy(r); if (destroy_rv == 0) ret = true; diff --git a/src/lxc/storage/storage_utils.c b/src/lxc/storage/storage_utils.c index f96bd520b..6fec638ea 100644 --- a/src/lxc/storage/storage_utils.c +++ b/src/lxc/storage/storage_utils.c @@ -165,8 +165,11 @@ int detect_fs(struct lxc_storage *bdev, char *type, int len) if (unshare(CLONE_NEWNS) < 0) _exit(EXIT_FAILURE); - if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) - SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); + if (detect_shared_rootfs()) + if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) { + SYSERROR("Failed to make / rslave"); + ERROR("Continuing..."); + } ret = mount_unknown_fs(srcdev, bdev->dest, bdev->mntopts); if (ret < 0) { @@ -256,10 +259,14 @@ int is_blktype(struct lxc_storage *b) return 0; } +// isulad: recored error +static char **mount_errors = NULL; + int mount_unknown_fs(const char *rootfs, const char *target, const char *options) { size_t i; + char *errs = NULL; int ret; struct cbarg { const char *rootfs; @@ -288,15 +295,30 @@ int mount_unknown_fs(const char *rootfs, const char *target, ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg); if (ret < 0) { ERROR("Failed to parse \"%s\"", fsfile[i]); + lxc_free_array((void**)mount_errors, free); + mount_errors = NULL; return -1; } - if (ret) + if (ret) { + lxc_free_array((void**)mount_errors, free); + mount_errors = NULL; return 0; + } } - ERROR("Failed to determine FSType for \"%s\"", rootfs); + if (mount_errors != NULL) { + errs = lxc_string_join("\n", (const char **)mount_errors, false); + if (errs == NULL) { + ERROR("failed to join mount errors"); + } + } + ERROR("Failed to determine FSType for \"%s\": %s", rootfs, errs ? errs : "unknown reason"); + + free(errs); + lxc_free_array((void**)mount_errors, free); + mount_errors = NULL; return -1; } @@ -315,6 +337,12 @@ int find_fstype_cb(char *buffer, void *data) unsigned long mntflags = 0; char *mntdata = NULL; char *fstype; + char mount_err[BUFSIZ] = {0}; + int ret; + +#ifdef HAVE_ISULAD + unsigned long pflags = 0; +#endif /* we don't try 'nodev' entries */ if (strstr(buffer, "nodev")) @@ -327,14 +355,34 @@ int find_fstype_cb(char *buffer, void *data) DEBUG("Trying to mount \"%s\"->\"%s\" with FSType \"%s\"", cbarg->rootfs, cbarg->target, fstype); +#ifdef HAVE_ISULAD + if (parse_mntopts(cbarg->options, &mntflags, &pflags, &mntdata) < 0) { + free(mntdata); + return 0; + } + + if (mount(cbarg->rootfs, cbarg->target, fstype, (mntflags & ~MS_RDONLY), mntdata)) { +#else if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) { free(mntdata); return 0; } if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) { +#endif SYSDEBUG("Failed to mount"); free(mntdata); + // isulad: recored error + ret = snprintf(mount_err, BUFSIZ, "\t\tmount %s onto %s with FSType %s failed: %s", + cbarg->rootfs, cbarg->target, fstype, strerror(errno)); + if (ret < 0 || (size_t)ret >= BUFSIZ) { + ERROR("failed to format output mount error"); + return 0; + } + + if (lxc_append_string(&mount_errors, mount_err) < 0) { + ERROR("failed to append mount error"); + } return 0; } diff --git a/src/lxc/storage/zfs.c b/src/lxc/storage/zfs.c index ee9e32d0a..025cf956f 100644 --- a/src/lxc/storage/zfs.c +++ b/src/lxc/storage/zfs.c @@ -159,23 +159,33 @@ bool zfs_detect(const char *path) int zfs_mount(struct lxc_storage *bdev) { - __do_free char *mntdata = NULL; unsigned long mntflags = 0; + char *mntdata = NULL; int ret; size_t oldlen, newlen, totallen; char *tmp; const char *src; char cmd_output[PATH_MAX] = {0}; +#ifdef HAVE_ISULAD + unsigned long pflags = 0; +#endif + if (strcmp(bdev->type, "zfs")) return -22; if (!bdev->src || !bdev->dest) return -22; +#ifdef HAVE_ISULAD + ret = parse_mntopts(bdev->mntopts, &mntflags, &pflags, &mntdata); +#else ret = parse_mntopts(bdev->mntopts, &mntflags, &mntdata); +#endif + if (ret < 0) { ERROR("Failed to parse mount options"); + free(mntdata); return -22; } @@ -220,6 +230,7 @@ int zfs_mount(struct lxc_storage *bdev) tmp = realloc(mntdata, totallen); if (!tmp) { ERROR("Failed to reallocate memory"); + free(mntdata); return -1; } mntdata = tmp; @@ -227,10 +238,12 @@ int zfs_mount(struct lxc_storage *bdev) ret = snprintf((mntdata + oldlen), newlen, ",zfsutil,mntpoint=%s", src); if (ret < 0 || (size_t)ret >= newlen) { ERROR("Failed to create string"); + free(mntdata); return -1; } ret = mount(src, bdev->dest, "zfs", mntflags, mntdata); + free(mntdata); if (ret < 0 && errno != EBUSY) { SYSERROR("Failed to mount \"%s\" on \"%s\"", src, bdev->dest); return -1; diff --git a/src/lxc/string_utils.c b/src/lxc/string_utils.c index dcb1160e4..9118add02 100644 --- a/src/lxc/string_utils.c +++ b/src/lxc/string_utils.c @@ -501,6 +501,7 @@ int lxc_grow_array(void ***array, size_t *capacity, size_t new_size, size_t capa /* first time around, catch some trivial mistakes of the user * only initializing one of these */ if (!*array || !*capacity) { + free(*array); *array = NULL; *capacity = 0; } diff --git a/src/lxc/sync.h b/src/lxc/sync.h index ff7a1eb18..56c1dfcfd 100644 --- a/src/lxc/sync.h +++ b/src/lxc/sync.h @@ -11,6 +11,10 @@ enum { LXC_SYNC_POST_CONFIGURE, LXC_SYNC_CGROUP, LXC_SYNC_CGROUP_UNSHARE, +#ifdef HAVE_ISULAD + LXC_SYNC_OCI_PRESTART_HOOK, + LXC_SYNC_POST_OCI_PRESTART_HOOK, +#endif LXC_SYNC_CGROUP_LIMITS, LXC_SYNC_READY_START, LXC_SYNC_RESTART, diff --git a/src/lxc/syscall_numbers.h b/src/lxc/syscall_numbers.h index bfd0e57ab..42609d43f 100644 --- a/src/lxc/syscall_numbers.h +++ b/src/lxc/syscall_numbers.h @@ -35,12 +35,10 @@ #define __NR_keyctl 280 #elif defined __powerpc__ #define __NR_keyctl 271 - #elif defined __riscv - #define __NR_keyctl 219 #elif defined __sparc__ #define __NR_keyctl 283 #elif defined __ia64__ - #define __NR_keyctl (249 + 1024) + #define __NR_keyctl 249 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_keyctl 4282 @@ -70,8 +68,6 @@ #define __NR_memfd_create 350 #elif defined __powerpc__ #define __NR_memfd_create 360 - #elif defined __riscv - #define __NR_memfd_create 279 #elif defined __sparc__ #define __NR_memfd_create 348 #elif defined __blackfin__ @@ -107,12 +103,10 @@ #define __NR_pivot_root 217 #elif defined __powerpc__ #define __NR_pivot_root 203 - #elif defined __riscv - #define __NR_pivot_root 41 #elif defined __sparc__ #define __NR_pivot_root 146 #elif defined __ia64__ - #define __NR_pivot_root (183 + 1024) + #define __NR_pivot_root 183 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_pivot_root 4216 @@ -142,12 +136,10 @@ #define __NR_setns 339 #elif defined __powerpc__ #define __NR_setns 350 - #elif defined __riscv - #define __NR_setns 268 #elif defined __sparc__ #define __NR_setns 337 #elif defined __ia64__ - #define __NR_setns (306 + 1024) + #define __NR_setns 306 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_setns 4344 @@ -177,12 +169,10 @@ #define __NR_sethostname 74 #elif defined __powerpc__ #define __NR_sethostname 74 - #elif defined __riscv - #define __NR_sethostname 161 #elif defined __sparc__ #define __NR_sethostname 88 #elif defined __ia64__ - #define __NR_sethostname (59 + 1024) + #define __NR_sethostname 59 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_sethostname 474 @@ -212,12 +202,10 @@ #define __NR_signalfd 316 #elif defined __powerpc__ #define __NR_signalfd 305 - #elif defined __riscv - #define __NR_signalfd 74 #elif defined __sparc__ #define __NR_signalfd 311 #elif defined __ia64__ - #define __NR_signalfd (283 + 1024) + #define __NR_signalfd 283 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_signalfd 4317 @@ -247,12 +235,10 @@ #define __NR_signalfd4 322 #elif defined __powerpc__ #define __NR_signalfd4 313 - #elif defined __riscv - #define __NR_signalfd4 74 #elif defined __sparc__ #define __NR_signalfd4 317 #elif defined __ia64__ - #define __NR_signalfd4 (289 + 1024) + #define __NR_signalfd4 289 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_signalfd4 4324 @@ -282,12 +268,10 @@ #define __NR_unshare 303 #elif defined __powerpc__ #define __NR_unshare 282 - #elif defined __riscv - #define __NR_unshare 97 #elif defined __sparc__ #define __NR_unshare 299 #elif defined __ia64__ - #define __NR_unshare (272 + 1024) + #define __NR_unshare 272 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_unshare 4303 @@ -317,12 +301,10 @@ #define __NR_bpf 351 #elif defined __powerpc__ #define __NR_bpf 361 - #elif defined __riscv - #define __NR_bpf 280 #elif defined __sparc__ #define __NR_bpf 349 #elif defined __ia64__ - #define __NR_bpf (317 + 1024) + #define __NR_bpf 317 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_bpf 4355 @@ -352,12 +334,10 @@ #define __NR_faccessat 300 #elif defined __powerpc__ #define __NR_faccessat 298 - #elif defined __riscv - #define __NR_faccessat 48 #elif defined __sparc__ #define __NR_faccessat 296 #elif defined __ia64__ - #define __NR_faccessat (269 + 1024) + #define __NR_faccessat 269 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_faccessat 4300 @@ -387,8 +367,6 @@ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ #define __NR_pidfd_send_signal 5424 #endif - #elif defined __ia64__ - #define __NR_pidfd_send_signal (424 + 1024) #else #define __NR_pidfd_send_signal 424 #endif @@ -407,12 +385,10 @@ #define __NR_seccomp 348 #elif defined __powerpc__ #define __NR_seccomp 358 - #elif defined __riscv - #define __NR_seccomp 277 #elif defined __sparc__ #define __NR_seccomp 346 #elif defined __ia64__ - #define __NR_seccomp (329 + 1024) + #define __NR_seccomp 329 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_seccomp 4352 @@ -442,12 +418,10 @@ #define __NR_gettid 236 #elif defined __powerpc__ #define __NR_gettid 207 - #elif defined __riscv - #define __NR_gettid 178 #elif defined __sparc__ #define __NR_gettid 143 #elif defined __ia64__ - #define __NR_gettid (81 + 1024) + #define __NR_gettid 81 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_gettid 4222 @@ -481,12 +455,10 @@ #define __NR_execveat 354 #elif defined __powerpc__ #define __NR_execveat 362 - #elif defined __riscv - #define __NR_execveat 281 #elif defined __sparc__ #define __NR_execveat 350 #elif defined __ia64__ - #define __NR_execveat (318 + 1024) + #define __NR_execveat 318 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ #define __NR_execveat 4356 @@ -503,64 +475,4 @@ #endif #endif -#ifndef __NR_move_mount - #if defined __alpha__ - #define __NR_move_mount 539 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_move_mount 4429 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_move_mount 6429 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_move_mount 5429 - #endif - #elif defined __ia64__ - #define __NR_move_mount (428 + 1024) - #else - #define __NR_move_mount 429 - #endif -#endif - -#ifndef __NR_open_tree - #if defined __alpha__ - #define __NR_open_tree 538 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_open_tree 4428 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_open_tree 6428 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_open_tree 5428 - #endif - #elif defined __ia64__ - #define __NR_open_tree (428 + 1024) - #else - #define __NR_open_tree 428 - #endif -#endif - -#ifndef __NR_clone3 - #if defined __alpha__ - #define __NR_clone3 545 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_clone3 4435 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_clone3 6435 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_clone3 5435 - #endif - #elif defined __ia64__ - #define __NR_clone3 (435 + 1024) - #else - #define __NR_clone3 435 - #endif -#endif - #endif /* __LXC_SYSCALL_NUMBERS_H */ diff --git a/src/lxc/syscall_wrappers.h b/src/lxc/syscall_wrappers.h index 041daf357..1cef21585 100644 --- a/src/lxc/syscall_wrappers.h +++ b/src/lxc/syscall_wrappers.h @@ -137,28 +137,4 @@ static int faccessat(int __fd, const char *__file, int __type, int __flag) } #endif -#ifndef HAVE_MOVE_MOUNT -static inline int move_mount_lxc(int from_dfd, const char *from_pathname, - int to_dfd, const char *to_pathname, - unsigned int flags) -{ - return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, - to_pathname, flags); -} -#define move_mount move_mount_lxc -#else -extern int move_mount(int from_dfd, const char *from_pathname, int to_dfd, - const char *to_pathname, unsigned int flags); -#endif - -#ifndef HAVE_OPEN_TREE -static inline int open_tree_lxc(int dfd, const char *filename, unsigned int flags) -{ - return syscall(__NR_open_tree, dfd, filename, flags); -} -#define open_tree open_tree_lxc -#else -extern int open_tree(int dfd, const char *filename, unsigned int flags); -#endif - #endif /* __LXC_SYSCALL_WRAPPER_H */ diff --git a/src/lxc/terminal.c b/src/lxc/terminal.c index e58db5c46..7441de791 100644 --- a/src/lxc/terminal.c +++ b/src/lxc/terminal.c @@ -28,6 +28,10 @@ #include "syscall_wrappers.h" #include "terminal.h" #include "utils.h" +#ifdef HAVE_ISULAD +#include "logger_json_file.h" +#include "include/strlcpy.h" +#endif #if HAVE_PTY_H #include @@ -65,7 +69,7 @@ void lxc_terminal_winsz(int srcfd, int dstfd) static void lxc_terminal_winch(struct lxc_terminal_state *ts) { - lxc_terminal_winsz(ts->stdinfd, ts->ptmxfd); + lxc_terminal_winsz(ts->stdinfd, ts->masterfd); } int lxc_terminal_signalfd_cb(int fd, uint32_t events, void *cbdata, @@ -105,7 +109,7 @@ struct lxc_terminal_state *lxc_terminal_signal_init(int srcfd, int dstfd) memset(ts, 0, sizeof(*ts)); ts->stdinfd = srcfd; - ts->ptmxfd = dstfd; + ts->masterfd = dstfd; ts->sigfd = -1; ret = sigemptyset(&mask); @@ -183,6 +187,69 @@ static int lxc_terminal_truncate_log_file(struct lxc_terminal *terminal) return lxc_unpriv(ftruncate(terminal->log_fd, 0)); } +#ifdef HAVE_ISULAD + +int lxc_set_terminal_winsz(struct lxc_terminal *terminal, unsigned int height, unsigned int width) +{ + int ret = 0; + struct winsize wsz; + + if (terminal->master < 0) { + return 0; + } + + ret = ioctl(terminal->master, TIOCGWINSZ, &wsz); + if (ret < 0) { + WARN("Failed to get window size"); + return -1; + } + wsz.ws_col = width; + wsz.ws_row = height; + + ret = ioctl(terminal->master, TIOCSWINSZ, &wsz); + if (ret < 0) + WARN("Failed to set window size"); + else + DEBUG("Set window size to %d columns and %d rows", wsz.ws_col, + wsz.ws_row); + return ret; +} + +/* + * isulad: support mult-logfiles + * */ +static int lxc_terminal_rename_old_log_file(struct lxc_terminal *terminal) +{ + int ret; + unsigned int i; + char tmp[PATH_MAX] = {0}; + char *rename_fname = NULL; + + for (i = terminal->log_rotate - 1; i > 1; i--) { + ret = snprintf(tmp, PATH_MAX, "%s.%u", terminal->log_path, i); + if (ret < 0 || ret >= PATH_MAX) { + free(rename_fname); + return -EFBIG; + } + free(rename_fname); + rename_fname = safe_strdup(tmp); + ret = snprintf(tmp, PATH_MAX, "%s.%u", terminal->log_path, (i - 1)); + if (ret < 0 || ret >= PATH_MAX) { + free(rename_fname); + return -EFBIG; + } + ret = lxc_unpriv(rename(tmp, rename_fname)); + if (ret < 0 && errno != ENOENT) { + free(rename_fname); + return ret; + } + } + + free(rename_fname); + return 0; +} +#endif + static int lxc_terminal_rotate_log_file(struct lxc_terminal *terminal) { __do_free char *tmp = NULL; @@ -196,6 +263,15 @@ static int lxc_terminal_rotate_log_file(struct lxc_terminal *terminal) if (terminal->log_fd < 0) return -EBADF; +#ifdef HAVE_ISULAD + /* isuald: rotate old log file first */ + ret = lxc_terminal_rename_old_log_file(terminal); + if(ret != 0) { + ERROR("Rename old log file failed"); + return ret; + } +#endif + len = strlen(terminal->log_path) + sizeof(".1"); tmp = must_realloc(NULL, len); @@ -212,6 +288,7 @@ static int lxc_terminal_rotate_log_file(struct lxc_terminal *terminal) return lxc_terminal_create_log_file(terminal); } +#ifndef HAVE_ISULAD static int lxc_terminal_write_log_file(struct lxc_terminal *terminal, char *buf, int bytes_read) { @@ -317,7 +394,456 @@ static int lxc_terminal_write_log_file(struct lxc_terminal *terminal, char *buf, bytes_read -= ret; return bytes_read; } +#endif + +#ifdef HAVE_ISULAD +/* get time buffer */ +static bool get_time_buffer(struct timespec *timestamp, char *timebuffer, + size_t maxsize) +{ + struct tm tm_utc = { 0 }; + int32_t nanos = 0; + time_t seconds; + size_t len = 0; + int ret = 0; + + if (!timebuffer || !maxsize) { + return false; + } + + seconds = (time_t)timestamp->tv_sec; + gmtime_r(&seconds, &tm_utc); + strftime(timebuffer, maxsize, "%Y-%m-%dT%H:%M:%S", &tm_utc); + + nanos = (int32_t)timestamp->tv_nsec; + len = strlen(timebuffer); + ret = snprintf(timebuffer + len, (maxsize - len), ".%09dZ", nanos); + if (ret < 0 || ret >= (maxsize - len)) { + return false; + } + + return true; +} + +/* get now time buffer */ +static bool get_now_time_buffer(char *timebuffer, size_t maxsize) +{ + int err = 0; + struct timespec ts; + + err = clock_gettime(CLOCK_REALTIME, &ts); + if (err != 0) { + ERROR("failed to get time"); + return false; + } + + return get_time_buffer(&ts, timebuffer, maxsize); +} + +static int isulad_lxc_terminal_rotate_write_data(struct lxc_terminal *terminal, const char *buf, + int bytes_read) +{ + int ret; + struct stat st; + int64_t space_left = -1; + + if (terminal->log_fd < 0) + return 0; + + /* A log size <= 0 means that there's no limit on the size of the log + * file at which point we simply ignore whether the log is supposed to + * be rotated or not. + */ + if (terminal->log_size <= 0) + return lxc_write_nointr(terminal->log_fd, buf, bytes_read); + + /* Get current size of the log file. */ + ret = fstat(terminal->log_fd, &st); + if (ret < 0) { + SYSERROR("Failed to stat the terminal log file descriptor"); + return -1; + } + + /* handle non-regular files */ + if ((st.st_mode & S_IFMT) != S_IFREG) { + /* This isn't a regular file. so rotating the file seems a + * dangerous thing to do, size limits are also very + * questionable. Let's not risk anything and tell the user that + * he's requesting us to do weird stuff. + */ + if (terminal->log_rotate > 0 || terminal->log_size > 0) + return -EINVAL; + + /* I mean, sure log wherever you want to. */ + return lxc_write_nointr(terminal->log_fd, buf, bytes_read); + } + + space_left = terminal->log_size - st.st_size; + + /* User doesn't want to rotate the log file and there's no more space + * left so simply truncate it. + */ + if (space_left <= 0 && terminal->log_rotate <= 0) { + ret = lxc_terminal_truncate_log_file(terminal); + if (ret < 0) + return ret; + + if (bytes_read <= terminal->log_size) + return lxc_write_nointr(terminal->log_fd, buf, bytes_read); + + /* Write as much as we can into the buffer and loose the rest. */ + return lxc_write_nointr(terminal->log_fd, buf, terminal->log_size); + } + + /* There's enough space left. */ + if (bytes_read <= space_left) + return lxc_write_nointr(terminal->log_fd, buf, bytes_read); + + /* There'd be more to write but we aren't instructed to rotate the log + * file so simply return. There's no error on our side here. + */ + if (terminal->log_rotate > 0) + ret = lxc_terminal_rotate_log_file(terminal); + else + ret = lxc_terminal_truncate_log_file(terminal); + if (ret < 0) + return ret; + + if (terminal->log_size < bytes_read) { + /* Well, this is unfortunate because it means that there is more + * to write than the user has granted us space. There are + * multiple ways to handle this but let's use the simplest one: + * write as much as we can, tell the user that there was more + * stuff to write and move on. + * Note that this scenario shouldn't actually happen with the + * standard pty-based terminal that LXC allocates since it will + * be switched into raw mode. In raw mode only 1 byte at a time + * should be read and written. + */ + WARN("Size of terminal log file is smaller than the bytes to write"); + ret = lxc_write_nointr(terminal->log_fd, buf, terminal->log_size); + if (ret < 0) + return -1; + bytes_read -= ret; + return bytes_read; + } + + /* Yay, we made it. */ + ret = lxc_write_nointr(terminal->log_fd, buf, bytes_read); + if (ret < 0) + return -1; + bytes_read -= ret; + return bytes_read; +} + +static ssize_t isulad_logger_json_write(struct lxc_terminal *terminal, const char *type, const char *buf, + int bytes_read) +{ + logger_json_file *msg = NULL; + ssize_t ret = -1; + size_t len; + char *json = NULL; + char timebuffer[64] = { 0 }; + parser_error err = NULL; + struct parser_context ctx = { GEN_OPTIONS_SIMPLIFY | GEN_OPTIONS_NOT_VALIDATE_UTF8, stderr }; + + if (bytes_read < 0 || bytes_read >= INT_MAX) { + return -1; + } + msg = calloc(sizeof(logger_json_file), 1); + if (msg == NULL) { + return -errno; + } + msg->log = calloc(bytes_read, 1); + if (!msg->log) { + goto cleanup; + } + memcpy(msg->log, buf, bytes_read); + msg->log_len = bytes_read; + msg->stream = type ? safe_strdup(type) : safe_strdup("stdout"); + + get_now_time_buffer(timebuffer, sizeof(timebuffer)); + msg->time = safe_strdup(timebuffer); + + json = logger_json_file_generate_json(msg, &ctx, &err); + if (!json) { + ERROR("Failed to generate json: %s", err); + goto cleanup; + } + len = strlen(json); + json[len] = '\n'; + ret = isulad_lxc_terminal_rotate_write_data(terminal, json, len + 1); +cleanup: + free(json); + free_logger_json_file(msg); + free(err); + return ret; +} + +static ssize_t isulad_logger_syslog_write(struct lxc_terminal *terminal, const char *buf) +{ + syslog(LOG_INFO, "%s", buf); + return 0; +} + +static inline bool is_syslog(const char *driver) +{ + if (driver == NULL) { + return false; + } + + return (strcmp("syslog", driver) == 0); +} + +static inline ssize_t isulad_logger_write(struct lxc_terminal *terminal, const char *type, const char *buf, + int bytes_read) +{ + if (is_syslog(terminal->log_driver)) { + return isulad_logger_syslog_write(terminal, buf); + } + + return isulad_logger_json_write(terminal, type, buf, bytes_read); +} + +static int isulad_lxc_terminal_write_log_file(struct lxc_terminal *terminal, const char *type, char *buf, + int bytes_read) +{ +#define __BUF_CACHE_SIZE (16 * LXC_TERMINAL_BUFFER_SIZE) + static char cache[__BUF_CACHE_SIZE]; + static int size = 0; + int upto, index; + int begin = 0, buf_readed = 0, buf_left = 0; + int ret; + + if (buf != NULL && bytes_read > 0) { + /* Work out how much more data we are okay with reading this time. */ + upto = size + bytes_read; + if (upto > __BUF_CACHE_SIZE) { + upto = __BUF_CACHE_SIZE; + } + + if (upto > size) { + buf_readed = upto - size; + memcpy(cache + size, buf, buf_readed); + buf_left = bytes_read - buf_readed; + size += buf_readed; + } + } + + // If we have no data to log, and there's no more coming, we're done. + if (size == 0) + return 0; + + // Break up the data that we've buffered up into lines, and log each in turn. + for (index = 0; index < size; index++) { + if (cache[index] == '\n') { + ret = isulad_logger_write(terminal, type, cache + begin, index - begin + 1); + if (ret < 0) { + WARN("Failed to log msg"); + } + begin = index + 1; + } + } + /* If there's no more coming, or the buffer is full but + * has no newlines, log whatever we haven't logged yet, + * noting that it's a partial log line. */ + if (buf == NULL || (begin == 0 && size == __BUF_CACHE_SIZE)) { + if (begin < size) { + ret = isulad_logger_write(terminal, type, cache + begin, size - begin); + if (ret < 0) { + WARN("Failed to log msg"); + } + begin = 0; + size = 0; + } + if (buf == NULL) { + return 0; + } + } + /* Move any unlogged data to the front of the buffer in preparation for another read. */ + if (begin > 0) { + memcpy(cache, cache + begin, size - begin); + size -= begin; + } + /* Move left data to cache buffer */ + if (buf_left > 0) { + memcpy(cache + size, buf + buf_readed, buf_left); + size += buf_left; + } + return 0; +} + +/* isulad: forward data to all fifos */ +static void lxc_forward_data_to_fifo(struct lxc_list *list, bool is_err, const char *buf, int r) +{ + struct lxc_list *it = NULL; + struct lxc_list *next = NULL; + struct lxc_fifos_fd *elem = NULL; + + lxc_list_for_each_safe(it, list, next) { + elem = it->elem; + if (is_err) { + if (elem->err_fd >= 0) + lxc_write_nointr(elem->err_fd, buf, r); + } else { + if (elem->out_fd >= 0) + lxc_write_nointr(elem->out_fd, buf, r); + } + } + + return; +} + +/* isulad: judge the fd whether is fifo */ +static bool lxc_terminal_is_fifo(int fd, struct lxc_list *list) +{ + struct lxc_list *it = NULL; + struct lxc_list *next = NULL; + struct lxc_fifos_fd *elem = NULL; + + lxc_list_for_each_safe(it, list, next) { + elem = it->elem; + if (elem->in_fd == fd) + return true; + } + + return false; +} + +/* isulad: if fd == -1, means delete all the fifos*/ +int lxc_terminal_delete_fifo(int fd, struct lxc_list *list) +{ + struct lxc_list *it = NULL; + struct lxc_list *next = NULL; + struct lxc_fifos_fd *elem = NULL; + + lxc_list_for_each_safe(it, list, next) { + elem = it->elem; + if (elem->in_fd == fd || -1 == fd) { + INFO("Delete fifo fd %d", fd); + lxc_list_del(it); + if (elem->in_fifo) + free(elem->in_fifo); + if (elem->out_fifo) + free(elem->out_fifo); + if (elem->err_fifo) + free(elem->err_fifo); + if (elem->in_fd >= 0) + close(elem->in_fd); + if (elem->out_fd >= 0) + close(elem->out_fd); + if (elem->err_fd >= 0) + close(elem->err_fd); + free(elem); + } + } + + return 0; +} + +int lxc_terminal_io_cb(int fd, uint32_t events, void *data, + struct lxc_epoll_descr *descr) +{ + struct lxc_terminal *terminal = data; + char buf[2 * LXC_TERMINAL_BUFFER_SIZE]; + int r, w, w_log, w_rbuf; + + w = r = lxc_read_nointr(fd, buf, sizeof(buf)); + if (r <= 0) { + INFO("Terminal client on fd %d has exited", fd); + lxc_mainloop_del_handler(descr, fd); + + if (fd == terminal->master) { + terminal->master = -EBADF; + /* write remained buffer to terminal log */ + if (terminal->log_fd >= 0) { + w_log = isulad_lxc_terminal_write_log_file(terminal, "stdout", NULL, 0); + if (w_log < 0) + TRACE("Failed to write %d bytes to terminal log", r); + } + /* notes: do not close the master fd due to if we close the fd, the process may + * recive SIGHUP and the exit code will be 129 (128 + 1) + */ + return LXC_MAINLOOP_CLOSE; + } else if (fd == terminal->peer) { + lxc_terminal_signal_fini(terminal); + terminal->peer = -EBADF; + close(fd); + return LXC_MAINLOOP_CONTINUE; /* isulad: do not close mainloop when peer close*/ + } else if (lxc_terminal_is_fifo(fd, &terminal->fifos)) { + /* isulad: delete fifos when the client close */ + lxc_terminal_delete_fifo(fd, &terminal->fifos); + return LXC_MAINLOOP_CONTINUE; + } else if (fd == terminal->pipes[1][0] || fd == terminal->pipes[2][0]) { + if (fd == terminal->pipes[1][0]) { + if (terminal->log_fd >= 0) { + w_log = isulad_lxc_terminal_write_log_file(terminal, "stdout", NULL, 0); + } + terminal->pipes[1][0] = -EBADF; + } else if (fd == terminal->pipes[2][0]) { + if (terminal->log_fd >= 0) { + w_log = isulad_lxc_terminal_write_log_file(terminal, "stderr", NULL, 0); + } + terminal->pipes[2][0] = -EBADF; + } + /* notes: do not close the master fd due to if we close the fd, the process may + * recive SIGHUP and the exit code will be 141 (128 + 13) + */ + return LXC_MAINLOOP_CONTINUE; + } else if (fd == terminal->pipes[0][1]) { + TRACE("closed stdin pipe of container stdin"); + terminal->pipes[0][1] = -EBADF; + return LXC_MAINLOOP_CONTINUE; + } else { + ERROR("Handler received unexpected file descriptor"); + } + close(fd); + return LXC_MAINLOOP_CLOSE; + } + + if (fd == terminal->peer || lxc_terminal_is_fifo(fd, &terminal->fifos)) { + if (terminal->master > 0) + w = lxc_write_nointr(terminal->master, buf, r); + if (terminal->pipes[0][1] > 0) + w = lxc_write_nointr(terminal->pipes[0][1], buf, r); + } + + w_rbuf = w_log = 0; + if (fd == terminal->master || fd == terminal->pipes[1][0] || fd == terminal->pipes[2][0]) { + /* write to peer first */ + if (terminal->peer >= 0) + w = lxc_write_nointr(terminal->peer, buf, r); + + /* isulad: forward data to fifos */ + lxc_forward_data_to_fifo(&terminal->fifos, fd == terminal->pipes[2][0], buf, r); + + /* write to terminal ringbuffer */ + if (terminal->buffer_size > 0) + w_rbuf = lxc_ringbuf_write(&terminal->ringbuf, buf, r); + + /* write to terminal log */ + if (terminal->log_fd >= 0) { + if (fd == terminal->master || fd == terminal->pipes[1][0]) + w_log = isulad_lxc_terminal_write_log_file(terminal, "stdout", buf, r); + else if (fd == terminal->pipes[2][0]) + w_log = isulad_lxc_terminal_write_log_file(terminal, "stderr", buf, r); + } + } + + if (w != r) + WARN("Short write on terminal r:%d != w:%d", r, w); + + if (w_rbuf < 0) { + errno = -w_rbuf; + SYSTRACE("Failed to write %d bytes to terminal ringbuffer", r); + } + if (w_log < 0) + TRACE("Failed to write %d bytes to terminal log", r); + + return LXC_MAINLOOP_CONTINUE; +} +#else int lxc_terminal_io_cb(int fd, uint32_t events, void *data, struct lxc_epoll_descr *descr) { @@ -330,8 +856,8 @@ int lxc_terminal_io_cb(int fd, uint32_t events, void *data, INFO("Terminal client on fd %d has exited", fd); lxc_mainloop_del_handler(descr, fd); - if (fd == terminal->ptmx) { - terminal->ptmx = -EBADF; + if (fd == terminal->master) { + terminal->master = -EBADF; } else if (fd == terminal->peer) { lxc_terminal_signal_fini(terminal); terminal->peer = -EBADF; @@ -344,10 +870,10 @@ int lxc_terminal_io_cb(int fd, uint32_t events, void *data, } if (fd == terminal->peer) - w = lxc_write_nointr(terminal->ptmx, buf, r); + w = lxc_write_nointr(terminal->master, buf, r); w_rbuf = w_log = 0; - if (fd == terminal->ptmx) { + if (fd == terminal->master) { /* write to peer first */ if (terminal->peer >= 0) w = lxc_write_nointr(terminal->peer, buf, r); @@ -374,6 +900,7 @@ int lxc_terminal_io_cb(int fd, uint32_t events, void *data, return LXC_MAINLOOP_CONTINUE; } +#endif static int lxc_terminal_mainloop_add_peer(struct lxc_terminal *terminal) { @@ -401,21 +928,125 @@ static int lxc_terminal_mainloop_add_peer(struct lxc_terminal *terminal) return 0; } +#ifdef HAVE_ISULAD +/* isulad add pipes to mainloop */ +static int lxc_terminal_mainloop_add_pipes(struct lxc_terminal *terminal) +{ + int ret = 0; + + // parent read data from fifo, and send to stdin of container + if (terminal->pipes[0][1] > 0) { + ret = lxc_mainloop_add_handler(terminal->descr, terminal->pipes[0][1], + lxc_terminal_io_cb, terminal); + if (ret) { + ERROR("pipe fd %d not added to mainloop", terminal->pipes[0][1]); + return -1; + } + } + // parent read data from stdout of container, and send to fifo + if (terminal->pipes[1][0] > 0) { + ret = lxc_mainloop_add_handler(terminal->descr, terminal->pipes[1][0], + lxc_terminal_io_cb, terminal); + if (ret) { + ERROR("pipe fd %d not added to mainloop", terminal->pipes[1][0]); + return -1; + } + } + // parent read data from stderr of container, and send to fifo + if (terminal->pipes[2][0] > 0) { + ret = lxc_mainloop_add_handler(terminal->descr, terminal->pipes[2][0], + lxc_terminal_io_cb, terminal); + if (ret) { + ERROR("pipe fd %d not added to mainloop", terminal->pipes[2][0]); + return -1; + } + } + return ret; +} + +/* isulad add fifo to mainloop */ +static int lxc_terminal_mainloop_add_fifo(struct lxc_terminal *terminal) +{ + int ret = 0; + struct lxc_list *it = NULL; + struct lxc_list *next = NULL; + struct lxc_fifos_fd *elem = NULL; + + lxc_list_for_each_safe(it, &terminal->fifos, next) { + elem = it->elem; + if (elem->in_fd >= 0) { + ret = lxc_mainloop_add_handler(terminal->descr, elem->in_fd, + lxc_terminal_io_cb, terminal); + if (ret) { + ERROR("console fifo %s not added to mainloop", elem->in_fifo); + return -1; + } + } + } + return ret; +} + +int lxc_terminal_mainloop_add(struct lxc_epoll_descr *descr, + struct lxc_terminal *terminal) +{ + int ret; + + /* We cache the descr so that we can add an fd to it when someone + * does attach to it in lxc_terminal_allocate(). + */ + terminal->descr = descr; + + ret = lxc_terminal_mainloop_add_peer(terminal); + if (ret < 0) { + ERROR("Failed to add handler for terminal peer to mainloop"); + return -1; + } + + /* isulad add pipes to mainloop */ + ret = lxc_terminal_mainloop_add_pipes(terminal); + if (ret < 0) { + ERROR("Failed to add handler for terminal fifos to mainloop"); + return -1; + } + + /* isulad add fifo to mainloop */ + ret = lxc_terminal_mainloop_add_fifo(terminal); + if (ret < 0) { + ERROR("Failed to add handler for terminal fifos to mainloop"); + return -1; + } + + if (terminal->master < 0) { + INFO("Terminal is not initialized"); + return 0; + } + + ret = lxc_mainloop_add_handler(descr, terminal->master, + lxc_terminal_io_cb, terminal); + if (ret < 0) { + ERROR("Failed to add handler for terminal master fd %d to " + "mainloop", terminal->master); + return -1; + } + + return 0; +} +#else int lxc_terminal_mainloop_add(struct lxc_epoll_descr *descr, struct lxc_terminal *terminal) { int ret; - if (terminal->ptmx < 0) { + if (terminal->master < 0) { INFO("Terminal is not initialized"); return 0; } - ret = lxc_mainloop_add_handler(descr, terminal->ptmx, + ret = lxc_mainloop_add_handler(descr, terminal->master, lxc_terminal_io_cb, terminal); if (ret < 0) { - ERROR("Failed to add handler for terminal ptmx fd %d to " - "mainloop", terminal->ptmx); + ERROR("Failed to add handler for terminal master fd %d to " + "mainloop", terminal->master); return -1; } @@ -426,6 +1057,7 @@ int lxc_terminal_mainloop_add(struct lxc_epoll_descr *descr, return lxc_terminal_mainloop_add_peer(terminal); } +#endif int lxc_setup_tios(int fd, struct termios *oldtios) { @@ -483,11 +1115,11 @@ static void lxc_terminal_peer_proxy_free(struct lxc_terminal *terminal) { lxc_terminal_signal_fini(terminal); - close(terminal->proxy.ptmx); - terminal->proxy.ptmx = -1; + close(terminal->proxy.master); + terminal->proxy.master = -1; - close(terminal->proxy.pts); - terminal->proxy.pts = -1; + close(terminal->proxy.slave); + terminal->proxy.slave = -1; terminal->proxy.busy = -1; @@ -503,7 +1135,7 @@ static int lxc_terminal_peer_proxy_alloc(struct lxc_terminal *terminal, struct termios oldtermio; struct lxc_terminal_state *ts; - if (terminal->ptmx < 0) { + if (terminal->master < 0) { ERROR("Terminal not set up"); return -1; } @@ -519,51 +1151,51 @@ static int lxc_terminal_peer_proxy_alloc(struct lxc_terminal *terminal, } /* This is the proxy terminal that will be given to the client, and - * that the real terminal ptmx will send to / recv from. + * that the real terminal master will send to / recv from. */ - ret = openpty(&terminal->proxy.ptmx, &terminal->proxy.pts, NULL, + ret = openpty(&terminal->proxy.master, &terminal->proxy.slave, NULL, NULL, NULL); if (ret < 0) { SYSERROR("Failed to open proxy terminal"); return -1; } - ret = ttyname_r(terminal->proxy.pts, terminal->proxy.name, + ret = ttyname_r(terminal->proxy.slave, terminal->proxy.name, sizeof(terminal->proxy.name)); if (ret < 0) { - SYSERROR("Failed to retrieve name of proxy terminal pts"); + SYSERROR("Failed to retrieve name of proxy terminal slave"); goto on_error; } - ret = fd_cloexec(terminal->proxy.ptmx, true); + ret = fd_cloexec(terminal->proxy.master, true); if (ret < 0) { - SYSERROR("Failed to set FD_CLOEXEC flag on proxy terminal ptmx"); + SYSERROR("Failed to set FD_CLOEXEC flag on proxy terminal master"); goto on_error; } - ret = fd_cloexec(terminal->proxy.pts, true); + ret = fd_cloexec(terminal->proxy.slave, true); if (ret < 0) { - SYSERROR("Failed to set FD_CLOEXEC flag on proxy terminal pts"); + SYSERROR("Failed to set FD_CLOEXEC flag on proxy terminal slave"); goto on_error; } - ret = lxc_setup_tios(terminal->proxy.pts, &oldtermio); + ret = lxc_setup_tios(terminal->proxy.slave, &oldtermio); if (ret < 0) goto on_error; - ts = lxc_terminal_signal_init(terminal->proxy.ptmx, terminal->ptmx); + ts = lxc_terminal_signal_init(terminal->proxy.master, terminal->master); if (!ts) goto on_error; terminal->tty_state = ts; - terminal->peer = terminal->proxy.pts; + terminal->peer = terminal->proxy.slave; terminal->proxy.busy = sockfd; ret = lxc_terminal_mainloop_add_peer(terminal); if (ret < 0) goto on_error; - NOTICE("Opened proxy terminal with ptmx fd %d and pts fd %d", - terminal->proxy.ptmx, terminal->proxy.pts); + NOTICE("Opened proxy terminal with master fd %d and slave fd %d", + terminal->proxy.master, terminal->proxy.slave); return 0; on_error: @@ -574,7 +1206,7 @@ on_error: int lxc_terminal_allocate(struct lxc_conf *conf, int sockfd, int *ttyreq) { int ttynum; - int ptmxfd = -1; + int masterfd = -1; struct lxc_tty_info *ttys = &conf->ttys; struct lxc_terminal *terminal = &conf->console; @@ -585,7 +1217,7 @@ int lxc_terminal_allocate(struct lxc_conf *conf, int sockfd, int *ttyreq) if (ret < 0) goto out; - ptmxfd = terminal->proxy.ptmx; + masterfd = terminal->proxy.master; goto out; } @@ -614,10 +1246,10 @@ int lxc_terminal_allocate(struct lxc_conf *conf, int sockfd, int *ttyreq) out_tty: ttys->tty[ttynum - 1].busy = sockfd; - ptmxfd = ttys->tty[ttynum - 1].ptmx; + masterfd = ttys->tty[ttynum - 1].master; out: - return ptmxfd; + return masterfd; } void lxc_terminal_free(struct lxc_conf *conf, int fd) @@ -633,20 +1265,37 @@ void lxc_terminal_free(struct lxc_conf *conf, int fd) if (terminal->proxy.busy != fd) return; - lxc_mainloop_del_handler(terminal->descr, terminal->proxy.pts); + lxc_mainloop_del_handler(terminal->descr, terminal->proxy.slave); lxc_terminal_peer_proxy_free(terminal); } static int lxc_terminal_peer_default(struct lxc_terminal *terminal) { - struct lxc_terminal_state *ts; - const char *path; + struct lxc_terminal_state *ts = NULL; + const char *path = NULL; int ret = 0; if (terminal->path) path = terminal->path; - else - path = "/dev/tty"; + +#ifdef HAVE_ISULAD + /* isulad: if no console was given, try current controlling terminal, there + * won't be one if we were started as a daemon (-d) + */ + if (!path && !access("/dev/tty", F_OK)) { + int fd; + fd = open("/dev/tty", O_RDWR); + if (fd >= 0) { + close(fd); + path = "/dev/tty"; + } + } + + if (!path) { + DEBUG("Not have a controlling terminal"); + return 0; + } +#endif terminal->peer = lxc_unpriv(open(path, O_RDWR | O_CLOEXEC)); if (terminal->peer < 0) { @@ -666,14 +1315,14 @@ static int lxc_terminal_peer_default(struct lxc_terminal *terminal) goto on_error_free_tios; } - ts = lxc_terminal_signal_init(terminal->peer, terminal->ptmx); + ts = lxc_terminal_signal_init(terminal->peer, terminal->master); terminal->tty_state = ts; if (!ts) { WARN("Failed to install signal handler"); goto on_error_free_tios; } - lxc_terminal_winsz(terminal->peer, terminal->ptmx); + lxc_terminal_winsz(terminal->peer, terminal->master); terminal->tios = malloc(sizeof(*terminal->tios)); if (!terminal->tios) @@ -749,22 +1398,51 @@ void lxc_terminal_delete(struct lxc_terminal *terminal) close(terminal->peer); terminal->peer = -1; - if (terminal->ptmx >= 0) - close(terminal->ptmx); - terminal->ptmx = -1; + if (terminal->master >= 0) + close(terminal->master); + terminal->master = -1; - if (terminal->pts >= 0) - close(terminal->pts); - terminal->pts = -1; + if (terminal->slave >= 0) + close(terminal->slave); + terminal->slave = -1; if (terminal->log_fd >= 0) close(terminal->log_fd); terminal->log_fd = -1; + +#ifdef HAVE_ISULAD + if (is_syslog(terminal->log_driver)) { + closelog(); + free(terminal->log_driver); + } + /* isulad: close all pipes */ + if (terminal->pipes[0][0] >= 0) + close(terminal->pipes[0][0]); + terminal->pipes[0][0] = -1; + if (terminal->pipes[0][1] >= 0) + close(terminal->pipes[0][1]); + terminal->pipes[0][1] = -1; + if (terminal->pipes[1][0] >= 0) + close(terminal->pipes[1][0]); + terminal->pipes[1][0] = -1; + if (terminal->pipes[1][1] >= 0) + close(terminal->pipes[1][1]); + terminal->pipes[1][1] = -1; + if (terminal->pipes[2][0] >= 0) + close(terminal->pipes[2][0]); + terminal->pipes[2][0] = -1; + if (terminal->pipes[2][1] >= 0) + close(terminal->pipes[2][1]); + terminal->pipes[2][1] = -1; + + /* isulad: delete all fifos */ + lxc_terminal_delete_fifo(-1, &terminal->fifos); +#endif } /** * Note that this function needs to run before the mainloop starts. Since we - * register a handler for the terminal's ptmxfd when we create the mainloop + * register a handler for the terminal's masterfd when we create the mainloop * the terminal handler needs to see an allocated ringbuffer. */ static int lxc_terminal_create_ringbuf(struct lxc_terminal *terminal) @@ -828,31 +1506,275 @@ int lxc_terminal_create_log_file(struct lxc_terminal *terminal) return 0; } +#ifdef HAVE_ISULAD +/* isulad: fd_nonblock */ +static int fd_nonblock(int fd) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + + return fcntl(fd, F_SETFL, (int)((unsigned int)flags | O_NONBLOCK)); +} + +static int terminal_fifo_open(const char *fifo_path, int flags) +{ + int fd = -1; + + fd = lxc_open(fifo_path, flags, 0); + if (fd < 0) { + WARN("Failed to open fifo %s to send message: %s.", fifo_path, + strerror(errno)); + return -1; + } + + return fd; +} + +bool fifo_exists(const char *path) +{ + struct stat sb; + int ret; + + ret = stat(path, &sb); + if (ret < 0) + // could be something other than eexist, just say no + return false; + return S_ISFIFO(sb.st_mode); +} + +/* isulad: set terminal fifos */ +static int lxc_terminal_set_fifo(struct lxc_terminal *console, const char *in, const char *out, const char *err, int *input_fd) +{ + int fifofd_in = -1, fifofd_out = -1, fifofd_err = -1; + struct lxc_fifos_fd *fifo_elem = NULL; + + if ((in && !fifo_exists(in)) || (out && !fifo_exists(out)) || (err && !fifo_exists(err))) { + ERROR("File %s or %s or %s does not refer to a FIFO", in, out, err); + return -1; + } + + if (in) { + fifofd_in = terminal_fifo_open(in, O_RDONLY | O_NONBLOCK | O_CLOEXEC); + if (fifofd_in < 0) { + SYSERROR("Failed to open FIFO: %s", in); + return -1; + } + } + + if (out) { + fifofd_out = terminal_fifo_open(out, O_WRONLY | O_NONBLOCK | O_CLOEXEC); + if (fifofd_out < 0) { + SYSERROR("Failed to open FIFO: %s", out); + if (fifofd_in >= 0) + close(fifofd_in); + return -1; + } + } + + if (err) { + fifofd_err = terminal_fifo_open(err, O_WRONLY | O_NONBLOCK | O_CLOEXEC); + if (fifofd_err < 0) { + SYSERROR("Failed to open FIFO: %s", err); + if (fifofd_in >= 0) + close(fifofd_in); + if (fifofd_out >= 0) + close(fifofd_out); + return -1; + } + } + + fifo_elem = malloc(sizeof(*fifo_elem)); + if (fifo_elem == NULL) { + if (fifofd_in >= 0) + close(fifofd_in); + if (fifofd_out >= 0) + close(fifofd_out); + if (fifofd_err >= 0) + close(fifofd_err); + return -1; + } + memset(fifo_elem, 0, sizeof(*fifo_elem)); + + fifo_elem->in_fifo = safe_strdup(in ? in : ""); + fifo_elem->out_fifo = safe_strdup(out ? out : ""); + fifo_elem->err_fifo = safe_strdup(err ? err : ""); + fifo_elem->in_fd = fifofd_in; + fifo_elem->out_fd = fifofd_out; + fifo_elem->err_fd = fifofd_err; + lxc_list_add_elem(&fifo_elem->node, fifo_elem); + lxc_list_add_tail(&console->fifos, &fifo_elem->node); + + if (input_fd) + *input_fd = fifofd_in; + + return 0; +} + +/* isulad: add default fifos */ +static int lxc_terminal_fifo_default(struct lxc_terminal *terminal) +{ + if (terminal->init_fifo[0] || terminal->init_fifo[1] || terminal->init_fifo[2]) + return lxc_terminal_set_fifo(terminal, terminal->init_fifo[0], terminal->init_fifo[1], terminal->init_fifo[2], NULL); + return 0; +} + int lxc_terminal_create(struct lxc_terminal *terminal) { int ret; - ret = openpty(&terminal->ptmx, &terminal->pts, NULL, NULL, NULL); + if (!terminal->disable_pty) { + ret = openpty(&terminal->master, &terminal->slave, NULL, NULL, NULL); + if (ret < 0) { + SYSERROR("Failed to open terminal"); + return -1; + } + + ret = ttyname_r(terminal->slave, terminal->name, sizeof(terminal->name)); + if (ret < 0) { + SYSERROR("Failed to retrieve name of terminal slave"); + goto err; + } + + ret = fd_cloexec(terminal->master, true); + if (ret < 0) { + SYSERROR("Failed to set FD_CLOEXEC flag on terminal master"); + goto err; + } + + /* isulad: make master NONBLOCK */ + ret = fd_nonblock(terminal->master); + if (ret < 0) { + SYSERROR("Failed to set O_NONBLOCK flag on terminal master"); + goto err; + } + + ret = fd_cloexec(terminal->slave, true); + if (ret < 0) { + SYSERROR("Failed to set FD_CLOEXEC flag on terminal slave"); + goto err; + } + + ret = lxc_terminal_peer_default(terminal); + if (ret < 0) { + ERROR("Failed to allocate proxy terminal"); + goto err; + } + } else { + /* isulad: create 3 pipes */ + /* for stdin */ + if (pipe2(terminal->pipes[0], O_CLOEXEC)) { + ERROR("Failed to create stdin pipe"); + goto err; + } + /* for stdout */ + if (pipe2(terminal->pipes[1], O_CLOEXEC)) { + ERROR("Failed to create stdout pipe"); + goto err; + } + /* for stderr */ + if (pipe2(terminal->pipes[2], O_CLOEXEC)) { + ERROR("Failed to create stderr pipe"); + goto err; + } + } + + /* isulad: open fifos */ + ret = lxc_terminal_fifo_default(terminal); + if (ret < 0) { + ERROR("Failed to allocate fifo terminal"); + goto err; + } + + return 0; + +err: + lxc_terminal_delete(terminal); + return -ENODEV; +} + +/* isulad: add fifos dynamic*/ +int lxc_terminal_add_fifos(struct lxc_conf *conf, const char *fifonames) +{ + int ret = 0; + struct lxc_terminal *terminal = &conf->console; + int fifofd_in = -1; + char *tmp = NULL, *saveptr = NULL, *in = NULL, *out = NULL, *err = NULL; + const char *none_fifo_name = "none"; + + tmp = safe_strdup(fifonames); + + in = strtok_r(tmp, "&&&&", &saveptr); + if (!in) { + ret = -1; + goto free_out; + } + if (strcmp(in, none_fifo_name) == 0) + in = NULL; + + out = strtok_r(NULL, "&&&&", &saveptr); + if (!out) { + ret = -1; + goto free_out; + } + if (strcmp(out, none_fifo_name) == 0) + out = NULL; + + err = strtok_r(NULL, "&&&&", &saveptr); + if (!err) { + ret = -1; + goto free_out; + } + if (strcmp(err, none_fifo_name) == 0) + err = NULL; + + ret = lxc_terminal_set_fifo(terminal, in, out, err, &fifofd_in); + if (ret < 0) { + ERROR("Faild to set fifos to console config"); + ret = -1; + goto free_out; + } + + if (lxc_mainloop_add_handler(terminal->descr, fifofd_in, + lxc_terminal_io_cb, terminal)) { + ERROR("console fifo not added to mainloop"); + lxc_terminal_delete_fifo(fifofd_in, &terminal->fifos); + ret = -1; + goto free_out; + } + +free_out: + if (tmp) + free(tmp); + return ret; +} + +#else +int lxc_terminal_create(struct lxc_terminal *terminal) +{ + int ret; + + ret = openpty(&terminal->master, &terminal->slave, NULL, NULL, NULL); if (ret < 0) { SYSERROR("Failed to open terminal"); return -1; } - ret = ttyname_r(terminal->pts, terminal->name, sizeof(terminal->name)); + ret = ttyname_r(terminal->slave, terminal->name, sizeof(terminal->name)); if (ret < 0) { - SYSERROR("Failed to retrieve name of terminal pts"); + SYSERROR("Failed to retrieve name of terminal slave"); goto err; } - ret = fd_cloexec(terminal->ptmx, true); + ret = fd_cloexec(terminal->master, true); if (ret < 0) { - SYSERROR("Failed to set FD_CLOEXEC flag on terminal ptmx"); + SYSERROR("Failed to set FD_CLOEXEC flag on terminal master"); goto err; } - ret = fd_cloexec(terminal->pts, true); + ret = fd_cloexec(terminal->slave, true); if (ret < 0) { - SYSERROR("Failed to set FD_CLOEXEC flag on terminal pts"); + SYSERROR("Failed to set FD_CLOEXEC flag on terminal slave"); goto err; } @@ -868,6 +1790,7 @@ err: lxc_terminal_delete(terminal); return -ENODEV; } +#endif int lxc_terminal_setup(struct lxc_conf *conf) { @@ -883,6 +1806,18 @@ int lxc_terminal_setup(struct lxc_conf *conf) if (ret < 0) return -1; +#ifdef HAVE_ISULAD + if (is_syslog(terminal->log_driver)) { + if (terminal->log_syslog_tag == NULL) { + terminal->log_syslog_tag = malloc(16 * sizeof(char)); + (void)strlcpy(terminal->log_syslog_tag, conf->name, 16); + } + if (terminal->log_syslog_facility <= 0) { + terminal->log_syslog_facility = LOG_DAEMON; + } + openlog(terminal->log_syslog_tag, LOG_PID, terminal->log_syslog_facility); + } +#endif ret = lxc_terminal_create_log_file(terminal); if (ret < 0) goto err; @@ -956,21 +1891,21 @@ int lxc_terminal_stdin_cb(int fd, uint32_t events, void *cbdata, ts->saw_escape = 0; } - ret = lxc_write_nointr(ts->ptmxfd, &c, 1); + ret = lxc_write_nointr(ts->masterfd, &c, 1); if (ret <= 0) return LXC_MAINLOOP_CLOSE; return LXC_MAINLOOP_CONTINUE; } -int lxc_terminal_ptmx_cb(int fd, uint32_t events, void *cbdata, +int lxc_terminal_master_cb(int fd, uint32_t events, void *cbdata, struct lxc_epoll_descr *descr) { int r, w; char buf[LXC_TERMINAL_BUFFER_SIZE]; struct lxc_terminal_state *ts = cbdata; - if (fd != ts->ptmxfd) + if (fd != ts->masterfd) return LXC_MAINLOOP_CLOSE; r = lxc_read_nointr(fd, buf, sizeof(buf)); @@ -984,16 +1919,16 @@ int lxc_terminal_ptmx_cb(int fd, uint32_t events, void *cbdata, return LXC_MAINLOOP_CONTINUE; } -int lxc_terminal_getfd(struct lxc_container *c, int *ttynum, int *ptmxfd) +int lxc_terminal_getfd(struct lxc_container *c, int *ttynum, int *masterfd) { - return lxc_cmd_console(c->name, ttynum, ptmxfd, c->config_path); + return lxc_cmd_console(c->name, ttynum, masterfd, c->config_path); } int lxc_console(struct lxc_container *c, int ttynum, int stdinfd, int stdoutfd, int stderrfd, int escape) { - int ptmxfd, ret, ttyfd; + int masterfd, ret, ttyfd; struct lxc_epoll_descr descr; struct termios oldtios; struct lxc_terminal_state *ts; @@ -1002,7 +1937,7 @@ int lxc_console(struct lxc_container *c, int ttynum, }; int istty = 0; - ttyfd = lxc_cmd_console(c->name, &ttynum, &ptmxfd, c->config_path); + ttyfd = lxc_cmd_console(c->name, &ttynum, &masterfd, c->config_path); if (ttyfd < 0) return -1; @@ -1010,7 +1945,7 @@ int lxc_console(struct lxc_container *c, int ttynum, if (ret < 0) TRACE("Process is already group leader"); - ts = lxc_terminal_signal_init(stdinfd, ptmxfd); + ts = lxc_terminal_signal_init(stdinfd, masterfd); if (!ts) { ret = -1; goto close_fds; @@ -1021,8 +1956,8 @@ int lxc_console(struct lxc_container *c, int ttynum, istty = isatty(stdinfd); if (istty) { - lxc_terminal_winsz(stdinfd, ptmxfd); - lxc_terminal_winsz(ts->stdinfd, ts->ptmxfd); + lxc_terminal_winsz(stdinfd, masterfd); + lxc_terminal_winsz(ts->stdinfd, ts->masterfd); } else { INFO("File descriptor %d does not refer to a terminal", stdinfd); } @@ -1049,10 +1984,10 @@ int lxc_console(struct lxc_container *c, int ttynum, goto close_mainloop; } - ret = lxc_mainloop_add_handler(&descr, ts->ptmxfd, - lxc_terminal_ptmx_cb, ts); + ret = lxc_mainloop_add_handler(&descr, ts->masterfd, + lxc_terminal_master_cb, ts); if (ret < 0) { - ERROR("Failed to add ptmx handler"); + ERROR("Failed to add master handler"); goto close_mainloop; } @@ -1093,7 +2028,7 @@ sigwinch_fini: lxc_terminal_signal_fini(&terminal); close_fds: - close(ptmxfd); + close(masterfd); close(ttyfd); return ret; @@ -1120,9 +2055,15 @@ int lxc_terminal_prepare_login(int fd) if (ret < 0) return -1; +#ifdef HAVE_ISULAD + ret = set_stdfds(fd); + if (ret < 0) + return -1; +#else ret = lxc_terminal_set_stdfds(fd); if (ret < 0) return -1; +#endif if (fd > STDERR_FILENO) close(fd); @@ -1133,19 +2074,31 @@ int lxc_terminal_prepare_login(int fd) void lxc_terminal_info_init(struct lxc_terminal_info *terminal) { terminal->name[0] = '\0'; - terminal->ptmx = -EBADF; - terminal->pts = -EBADF; + terminal->master = -EBADF; + terminal->slave = -EBADF; terminal->busy = -1; } void lxc_terminal_init(struct lxc_terminal *terminal) { memset(terminal, 0, sizeof(*terminal)); - terminal->pts = -EBADF; - terminal->ptmx = -EBADF; + terminal->slave = -EBADF; + terminal->master = -EBADF; terminal->peer = -EBADF; terminal->log_fd = -EBADF; lxc_terminal_info_init(&terminal->proxy); +#ifdef HAVE_ISULAD + terminal->init_fifo[0] = NULL; + terminal->init_fifo[1] = NULL; + terminal->init_fifo[2] = NULL; + terminal->pipes[0][0] = -1; + terminal->pipes[0][1] = -1; + terminal->pipes[1][0] = -1; + terminal->pipes[1][1] = -1; + terminal->pipes[2][0] = -1; + terminal->pipes[2][1] = -1; + lxc_list_init(&terminal->fifos); +#endif } void lxc_terminal_conf_free(struct lxc_terminal *terminal) @@ -1155,6 +2108,15 @@ void lxc_terminal_conf_free(struct lxc_terminal *terminal) if (terminal->buffer_size > 0 && terminal->ringbuf.addr) lxc_ringbuf_release(&terminal->ringbuf); lxc_terminal_signal_fini(terminal); +#ifdef HAVE_ISULAD + /*isulad: free console fifos */ + free(terminal->init_fifo[0]); + free(terminal->init_fifo[1]); + free(terminal->init_fifo[2]); + lxc_terminal_delete_fifo(-1, &terminal->fifos); + free(terminal->log_driver); + free(terminal->log_syslog_tag); +#endif } int lxc_terminal_map_ids(struct lxc_conf *c, struct lxc_terminal *terminal) @@ -1167,13 +2129,14 @@ int lxc_terminal_map_ids(struct lxc_conf *c, struct lxc_terminal *terminal) if (strcmp(terminal->name, "") == 0) return 0; - ret = userns_exec_mapped_root(terminal->name, terminal->pts, c); + ret = chown_mapped_root(terminal->name, c); if (ret < 0) { - return log_error(-1, "Failed to chown terminal %d(%s)", - terminal->pts, terminal->name); + ERROR("Failed to chown terminal \"%s\"", terminal->name); + return -1; } - TRACE("Chowned terminal %d(%s)", terminal->pts, terminal->name); + TRACE("Chowned terminal \"%s\"", terminal->name); return 0; } + diff --git a/src/lxc/terminal.h b/src/lxc/terminal.h index 4d21f33d9..9de4cd055 100644 --- a/src/lxc/terminal.h +++ b/src/lxc/terminal.h @@ -15,14 +15,14 @@ struct lxc_conf; struct lxc_epoll_descr; struct lxc_terminal_info { - /* the path name of the pts side */ + /* the path name of the slave side */ char name[PATH_MAX]; - /* the file descriptor of the ptmx */ - int ptmx; + /* the file descriptor of the master */ + int master; - /* the file descriptor of the pts */ - int pts; + /* the file descriptor of the slave */ + int slave; /* whether the terminal is currently used */ int busy; @@ -32,7 +32,7 @@ struct lxc_terminal_state { struct lxc_list node; int stdinfd; int stdoutfd; - int ptmxfd; + int masterfd; /* Escape sequence to use for exiting the terminal. A single char can * be specified. The terminal can then exited by doing: Ctrl + @@ -57,8 +57,8 @@ struct lxc_terminal_state { }; struct lxc_terminal { - int pts; - int ptmx; + int slave; + int master; int peer; struct lxc_terminal_info proxy; struct lxc_epoll_descr *descr; @@ -79,6 +79,16 @@ struct lxc_terminal { /* whether the log file will be rotated */ unsigned int log_rotate; +#ifdef HAVE_ISULAD + /* driver of log, support file and syslog */ + char *log_driver; + + /* syslog tag for every log */ + char *log_syslog_tag; + + /* syslog facility */ + int log_syslog_facility; +#endif }; struct /* lxc_terminal_ringbuf */ { @@ -88,7 +98,27 @@ struct lxc_terminal { /* the in-memory ringbuffer */ struct lxc_ringbuf ringbuf; }; +#ifdef HAVE_ISULAD + char *init_fifo[3]; /* isulad: default fifos for the start */ + struct lxc_list fifos; /* isulad: fifos used to forward teminal */ + bool disable_pty; + bool open_stdin; + int pipes[3][2]; /* isulad: pipes for dup to container fds of stdin,stdout,stderr on daemonize mode*/ +#endif +}; + +#ifdef HAVE_ISULAD +/* isulad: fifo struct */ +struct lxc_fifos_fd { + char *in_fifo; + char *out_fifo; + char *err_fifo; + int in_fd; + int out_fd; + int err_fd; + struct lxc_list node; }; +#endif /** * lxc_terminal_allocate: allocate the console or a tty @@ -102,10 +132,10 @@ extern int lxc_terminal_allocate(struct lxc_conf *conf, int sockfd, int *ttynum /** * Create a new terminal: - * - calls openpty() to allocate a ptmx/pts pair - * - sets the FD_CLOEXEC flag on the ptmx/pts fds + * - calls openpty() to allocate a master/slave pair + * - sets the FD_CLOEXEC flag on the master/slave fds * - allocates either the current controlling terminal (default) or a user - * specified terminal as proxy for the newly created ptmx/pts pair + * specified terminal as proxy for the newly created master/slave pair * - sets up SIGWINCH handler, winsz, and new terminal settings * (Handlers for SIGWINCH and I/O are not registered in a mainloop.) */ @@ -164,7 +194,7 @@ extern int lxc_console(struct lxc_container *c, int ttynum, * the range specified by lxc.tty.max to allocate a specific tty. */ extern int lxc_terminal_getfd(struct lxc_container *c, int *ttynum, - int *ptmxfd); + int *masterfd); /** * Make fd a duplicate of the standard file descriptors. The fd is made a @@ -183,12 +213,12 @@ extern int lxc_terminal_stdin_cb(int fd, uint32_t events, void *cbdata, struct lxc_epoll_descr *descr); /** - * Handler for events on the ptmx fd of the terminal. To be registered via + * Handler for events on the master fd of the terminal. To be registered via * the corresponding functions declared and defined in mainloop.{c,h} or * lxc_terminal_mainloop_add(). * This function exits the loop cleanly when an EPOLLHUP event is received. */ -extern int lxc_terminal_ptmx_cb(int fd, uint32_t events, void *cbdata, +extern int lxc_terminal_master_cb(int fd, uint32_t events, void *cbdata, struct lxc_epoll_descr *descr); /** @@ -202,9 +232,9 @@ extern int lxc_setup_tios(int fd, struct termios *oldtios); * lxc_terminal_winsz: propagate winsz from one terminal to another * * @srcfd - * - terminal to get size from (typically a pts pty) + * - terminal to get size from (typically a slave pty) * @dstfd - * - terminal to set size on (typically a ptmx pty) + * - terminal to set size on (typically a master pty) */ extern void lxc_terminal_winsz(int srcfd, int dstfd); @@ -254,4 +284,9 @@ extern void lxc_terminal_init(struct lxc_terminal *terminal); extern int lxc_terminal_map_ids(struct lxc_conf *c, struct lxc_terminal *terminal); +#ifdef HAVE_ISULAD +int lxc_terminal_add_fifos(struct lxc_conf *conf, const char *fifonames); +int lxc_set_terminal_winsz(struct lxc_terminal *terminal, unsigned int height, unsigned int width); +#endif + #endif /* __LXC_TERMINAL_H */ diff --git a/src/lxc/tools/arguments.h b/src/lxc/tools/arguments.h index cb0ba744d..41ea1097a 100644 --- a/src/lxc/tools/arguments.h +++ b/src/lxc/tools/arguments.h @@ -40,6 +40,16 @@ struct lxc_arguments { /* for lxc-start */ const char *share_ns[32]; /* size must be greater than LXC_NS_MAX */ +#ifdef HAVE_ISULAD + const char *container_info; /* isulad: file used to store pid and ppid info of container */ + char *terminal_fifos[3]; /* isulad add, fifos used to redirct stdin/out/err */ + const char *exit_monitor_fifo; /* isulad: fifo used to monitor state of monitor process */ + const char *suffix; /* isulad add, suffix used for connect with parent of execed process*/ + int disable_pty; + int open_stdin; + unsigned int start_timeout; /* isulad: Seconds for waiting on a container to start before it is killed*/ + int64_t attach_timeout; /* for lxc-attach */ +#endif /* for lxc-console */ unsigned int ttynum; @@ -152,6 +162,19 @@ struct lxc_arguments { #define OPT_SHARE_UTS OPT_USAGE - 5 #define OPT_SHARE_PID OPT_USAGE - 6 +#ifdef HAVE_ISULAD +#define OPT_INPUT_FIFO OPT_USAGE - 7 +#define OPT_OUTPUT_FIFO OPT_USAGE - 8 +#define OPT_STDERR_FIFO OPT_USAGE - 9 +#define OPT_CONTAINER_INFO OPT_USAGE - 10 +#define OPT_EXIT_FIFO OPT_USAGE - 11 +#define OPT_START_TIMEOUT OPT_USAGE - 12 +#define OPT_DISABLE_PTY OPT_USAGE - 13 +#define OPT_OPEN_STDIN OPT_USAGE - 14 +#define OPT_ATTACH_TIMEOUT OPT_USAGE - 15 +#define OPT_ATTACH_SUFFIX OPT_USAGE - 16 +#endif + extern int lxc_arguments_parse(struct lxc_arguments *args, int argc, char *const argv[]); diff --git a/src/lxc/tools/lxc_attach.c b/src/lxc/tools/lxc_attach.c index a8f493aa7..dbddc2a51 100644 --- a/src/lxc/tools/lxc_attach.c +++ b/src/lxc/tools/lxc_attach.c @@ -72,8 +72,19 @@ static const struct option my_longopts[] = { {"set-var", required_argument, 0, 'v'}, {"pty-log", required_argument, 0, 'L'}, {"rcfile", required_argument, 0, 'f'}, +#ifndef HAVE_ISULAD {"uid", required_argument, 0, 'u'}, {"gid", required_argument, 0, 'g'}, +#else + {"user", required_argument, 0, 'u'}, + {"in-fifo", required_argument, 0, OPT_INPUT_FIFO}, /* isulad add terminal fifos*/ + {"out-fifo", required_argument, 0, OPT_OUTPUT_FIFO}, + {"err-fifo", required_argument, 0, OPT_STDERR_FIFO}, + {"suffix", required_argument, 0, OPT_ATTACH_SUFFIX}, + {"timeout", required_argument, 0, OPT_ATTACH_TIMEOUT}, + {"disable-pty", no_argument, 0, OPT_DISABLE_PTY}, + {"open-stdin", no_argument, 0, OPT_OPEN_STDIN}, +#endif LXC_COMMON_OPTIONS }; @@ -124,9 +135,19 @@ Options :\n\ multiple times.\n\ -f, --rcfile=FILE\n\ Load configuration file FILE\n\ +" +#ifndef HAVE_ISULAD +"\ -u, --uid=UID Execute COMMAND with UID inside the container\n\ -g, --gid=GID Execute COMMAND with GID inside the container\n\ -", +" +#else +"\ + --user User ID (format: UID[:GID])\n\ + --timeout Timeout in seconds (default: 0)\n\ +" +#endif +, .options = my_longopts, .parser = my_parser, .checker = NULL, @@ -136,6 +157,71 @@ Options :\n\ .gid = LXC_INVALID_GID, }; +#ifdef HAVE_ISULAD +static int parse_user_id(const char *username, char **uid, char **gid, char **tmp_dup) +{ + char *tmp = NULL; + char *pdot = NULL; + + if (uid == NULL || gid == NULL || tmp_dup == NULL) { + return -1; + } + + if (username != NULL) { + tmp = strdup(username); + if (tmp == NULL) { + ERROR("Failed to duplicate user name"); + return -1; + } + + // for free tmp in caller + *tmp_dup = tmp; + pdot = strstr(tmp, ":"); + if (pdot != NULL) { + *pdot = '\0'; + if (pdot != tmp) { + // uid found + *uid = tmp; + } + + if (*(pdot + 1) != '\0') { + // gid found + *gid = pdot + 1; + } + } else { + // No : found + if (*tmp != '\0') { + *uid = tmp; + } + } + } + + return 0; +} + +static int get_attach_uid_gid(const char *username, uid_t *user_id, gid_t *group_id) +{ + char *tmp = NULL; + char *uid = NULL; + char *gid = NULL; + + // parse uid and gid by username + if (parse_user_id(username, &uid, &gid, &tmp) != 0) { + return -1; + } + + if (uid != NULL) { + *user_id = (unsigned int)atoll(uid); + } + if (gid != NULL) { + *group_id = (unsigned int)atoll(gid); + } + + free(tmp); + return 0; +} +#endif + static int my_parser(struct lxc_arguments *args, int c, char *arg) { int ret; @@ -193,6 +279,7 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) case 'f': args->rcfile = arg; break; +#ifndef HAVE_ISULAD case 'u': if (lxc_safe_uint(arg, &args->uid) < 0) return -1; @@ -201,8 +288,40 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) if (lxc_safe_uint(arg, &args->gid) < 0) return -1; break; +#else + case 'u': + if (get_attach_uid_gid(arg, &args->uid, &args->gid) != 0) { + ERROR("Failed to get attach user U/GID"); + return -1; + } + break; + case OPT_INPUT_FIFO: + args->terminal_fifos[0] = arg; + break; + case OPT_OUTPUT_FIFO: + args->terminal_fifos[1] = arg; + break; + case OPT_STDERR_FIFO: + args->terminal_fifos[2] = arg; + break; + case OPT_ATTACH_SUFFIX: + args->suffix = arg; + break; + case OPT_ATTACH_TIMEOUT: + if(!is_non_negative_num(arg)) { + ERROR("Error attach timeout parameter:%s.\n", arg); + return -1; + } + args->attach_timeout = (unsigned int)atoll(arg); + break; + case OPT_DISABLE_PTY: + args->disable_pty = 1; + break; + case OPT_OPEN_STDIN: + args->open_stdin = 1; + break; +#endif } - return 0; } @@ -264,6 +383,281 @@ static int lxc_attach_create_log_file(const char *log_file) return fd; } +#ifdef HAVE_ISULAD +// isulad: send '128 + signal' if container is killed by signal. +#define EXIT_SIGNAL_OFFSET 128 + +/*isulad: attach with terminal*/ +static int do_attach_foreground(struct lxc_container *c, lxc_attach_command_t *command, + lxc_attach_options_t *attach_options, + char **errmsg) +{ + int ret = 0; + pid_t pid; + int wexit = -1; + int signal; + + if (command->program) + ret = c->attach(c, lxc_attach_run_command, command, attach_options, &pid); + else + ret = c->attach(c, lxc_attach_run_shell, NULL, attach_options, &pid); + if (ret < 0) { + *errmsg = safe_strdup("Internal error, failed to call attach"); + goto out; + } + + ret = lxc_wait_for_pid_status(pid); + if (ret < 0) { + free(*errmsg); + *errmsg = safe_strdup("Internal error, failed to wait attached process"); + goto out; + } + + if (WIFEXITED(ret)) + wexit = WEXITSTATUS(ret); + else + wexit = -1; + + if (WIFSIGNALED(ret)) { + signal = WTERMSIG(ret); + wexit = EXIT_SIGNAL_OFFSET + signal; + } + + ERROR("Execd pid %d exit with %d", pid, wexit); + +out: + if (c->lxc_conf->errmsg) { + free(*errmsg); + *errmsg = safe_strdup(c->lxc_conf->errmsg); + } + return wexit; +} + +static void close_msg_pipe(int *errpipe) +{ + if (errpipe[0] >= 0) { + close(errpipe[0]); + errpipe[0] = -1; + } + if (errpipe[1] >= 0) { + close(errpipe[1]); + errpipe[1] = -1; + } +} + +/*isulad: attach without terminal in background */ +static int do_attach_background(struct lxc_container *c, lxc_attach_command_t *command, + lxc_attach_options_t *attach_options, + char **errmsg) +{ + int ret = 0; + int msgpipe[2]; + pid_t pid = 0; + ssize_t size_read; + char msgbuf[BUFSIZ + 1] = {0}; + + //pipdfd for get error message of child or grandchild process. + if (pipe2(msgpipe, O_CLOEXEC) != 0) { + SYSERROR("Failed to init msgpipe"); + return -1; + } + + pid = fork(); + if (pid < 0) { + close_msg_pipe(msgpipe); + return -1; + } + + if (pid != 0) { + close(msgpipe[1]); + msgpipe[1] = -1; + size_read = read(msgpipe[0], msgbuf, BUFSIZ); + if (size_read > 0) { + *errmsg = safe_strdup(msgbuf); + ret = -1; + } + + close(msgpipe[0]); + msgpipe[0] = -1; + + return ret; + } + + /* second fork to be reparented by init */ + pid = fork(); + if (pid < 0) { + SYSERROR("Error doing dual-fork"); + close_msg_pipe(msgpipe); + exit(1); + } + if (pid != 0) { + close_msg_pipe(msgpipe); + exit(0); + } + + close(msgpipe[0]); + msgpipe[0] = -1; + + if (null_stdfds() < 0) { + ERROR("failed to close fds"); + exit(1); + } + setsid(); + + if (command->program) + ret = c->attach(c, lxc_attach_run_command, command, attach_options, &pid); + else + ret = c->attach(c, lxc_attach_run_shell, NULL, attach_options, &pid); + if (ret < 0) { + if (c->lxc_conf->errmsg) + lxc_write_error_message(msgpipe[1], "%s", c->lxc_conf->errmsg); + else + lxc_write_error_message(msgpipe[1], "Failed to attach container"); + close(msgpipe[1]); + msgpipe[1] = -1; + ret = -1; + goto out; + } + + close(msgpipe[1]); + msgpipe[1] = -1; + + ret = wait_for_pid(pid); +out: + lxc_container_put(c); + if (ret) + exit(EXIT_FAILURE); + else + exit(0); +} + +int main(int argc, char *argv[]) +{ + int wexit = 0; + struct lxc_log log; + char *errmsg = NULL; + lxc_attach_options_t attach_options = LXC_ATTACH_OPTIONS_DEFAULT; + lxc_attach_command_t command = (lxc_attach_command_t){.program = NULL}; + + if (lxc_caps_init()) + exit(EXIT_FAILURE); + + if (lxc_arguments_parse(&my_args, argc, argv)) + exit(EXIT_FAILURE); + + log.name = my_args.name; + log.file = my_args.log_file; + log.level = my_args.log_priority; + log.prefix = my_args.progname; + log.quiet = my_args.quiet; + log.lxcpath = my_args.lxcpath[0]; + + if (lxc_log_init(&log)) + exit(EXIT_FAILURE); + + if (geteuid()) + if (access(my_args.lxcpath[0], O_RDONLY) < 0) { + ERROR("You lack access to %s", my_args.lxcpath[0]); + exit(EXIT_FAILURE); + } + + struct lxc_container *c = lxc_container_new(my_args.name, my_args.lxcpath[0]); + if (!c) + exit(EXIT_FAILURE); + + if (my_args.rcfile) { + c->clear_config(c); + if (!c->load_config(c, my_args.rcfile)) { + ERROR("Failed to load rcfile"); + lxc_container_put(c); + exit(EXIT_FAILURE); + } + + c->configfile = strdup(my_args.rcfile); + if (!c->configfile) { + ERROR("Out of memory setting new config filename"); + lxc_container_put(c); + exit(EXIT_FAILURE); + } + } + + if (!c->may_control(c)) { + ERROR("Insufficent privileges to control %s", c->name); + lxc_container_put(c); + exit(EXIT_FAILURE); + } + + if (remount_sys_proc) + attach_options.attach_flags |= LXC_ATTACH_REMOUNT_PROC_SYS; + + if (elevated_privileges) + attach_options.attach_flags &= ~(elevated_privileges); + + if (my_args.terminal_fifos[0] || my_args.terminal_fifos[1] || my_args.terminal_fifos[2]) { + attach_options.init_fifo[0] = my_args.terminal_fifos[0]; + attach_options.init_fifo[1] = my_args.terminal_fifos[1]; + attach_options.init_fifo[2] = my_args.terminal_fifos[2]; + attach_options.attach_flags |= LXC_ATTACH_TERMINAL; + } else if (stdfd_is_pty()) { + attach_options.attach_flags |= LXC_ATTACH_TERMINAL; + } + + attach_options.namespaces = namespace_flags; + attach_options.personality = new_personality; + attach_options.env_policy = env_policy; + attach_options.extra_env_vars = extra_env; + attach_options.extra_keep_env = extra_keep; + attach_options.timeout = my_args.attach_timeout; + + if (my_args.argc > 0) { + command.program = my_args.argv[0]; + command.argv = (char**)my_args.argv; + } + + if (my_args.console_log) { + attach_options.log_fd = lxc_attach_create_log_file(my_args.console_log); + if (attach_options.log_fd < 0) { + ERROR("Failed to create log file for %s", c->name); + lxc_container_put(c); + exit(EXIT_FAILURE); + } + } + + if (my_args.uid != LXC_INVALID_UID) + attach_options.uid = my_args.uid; + + if (my_args.gid != LXC_INVALID_GID) + attach_options.gid = my_args.gid; + + attach_options.suffix = my_args.suffix; + + if (my_args.disable_pty) { + attach_options.disable_pty = true; + } + + if (my_args.open_stdin) { + attach_options.open_stdin = true; + } + + /* isulad: add do attach background */ + if (attach_options.attach_flags & LXC_ATTACH_TERMINAL) + wexit = do_attach_foreground(c, &command, &attach_options, &errmsg); + else + wexit = do_attach_background(c, &command, &attach_options, &errmsg); + + if (errmsg) { + fprintf(stderr, "%s:%s:%s:%d starting container process caused \"%s\"", c->name, + __FILE__, __func__, __LINE__, errmsg); + free(errmsg); + } + + lxc_container_put(c); + if (wexit >= 0) + exit(wexit); + + exit(EXIT_FAILURE); +} +#else int main(int argc, char *argv[]) { int ret = -1; @@ -377,3 +771,4 @@ out: exit(EXIT_FAILURE); } +#endif diff --git a/src/lxc/tools/lxc_ls.c b/src/lxc/tools/lxc_ls.c index 0abcd7a63..e601f9d70 100644 --- a/src/lxc/tools/lxc_ls.c +++ b/src/lxc/tools/lxc_ls.c @@ -106,7 +106,11 @@ struct wrapargs { /* * Takes struct wrapargs as argument. */ +#ifdef HAVE_ISULAD +static int ls_get_wrapper(void *wrap, int msgfd); +#else static int ls_get_wrapper(void *wrap); +#endif /* * To calculate swap usage we should not simply check memory.usage_in_bytes and @@ -1005,7 +1009,11 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) return 0; } +#ifdef HAVE_ISULAD +static int ls_get_wrapper(void *wrap, int msgfd) +#else static int ls_get_wrapper(void *wrap) +#endif { int ret = -1; size_t len = 0; @@ -1166,9 +1174,6 @@ static int ls_recv_str(int fd, char **buf) if (ret != sizeof(slen)) return -1; - if (slen == SIZE_MAX) - return -1; - if (slen > 0) { *buf = malloc(sizeof(char) * (slen + 1)); if (!*buf) @@ -1180,11 +1185,6 @@ static int ls_recv_str(int fd, char **buf) return -1; } - if (slen == SIZE_MAX) { - free(*buf); - return -1; - } - (*buf)[slen] = '\0'; } diff --git a/src/lxc/tools/lxc_start.c b/src/lxc/tools/lxc_start.c index 459b86793..4f2c8afa7 100644 --- a/src/lxc/tools/lxc_start.c +++ b/src/lxc/tools/lxc_start.c @@ -28,6 +28,11 @@ #include "confile.h" #include "log.h" +#ifdef HAVE_ISULAD +#include +#include "isulad_utils.h" +#endif + lxc_log_define(lxc_start, lxc); static int my_parser(struct lxc_arguments *args, int c, char *arg); @@ -48,6 +53,17 @@ static const struct option my_longopts[] = { {"share-ipc", required_argument, 0, OPT_SHARE_IPC}, {"share-uts", required_argument, 0, OPT_SHARE_UTS}, {"share-pid", required_argument, 0, OPT_SHARE_PID}, +#ifdef HAVE_ISULAD + {"in-fifo", required_argument, 0, OPT_INPUT_FIFO}, + {"out-fifo", required_argument, 0, OPT_OUTPUT_FIFO}, + {"err-fifo", required_argument, 0, OPT_STDERR_FIFO}, + {"container-pidfile", required_argument, 0, OPT_CONTAINER_INFO}, + {"exit-fifo", required_argument, 0, OPT_EXIT_FIFO}, + {"start-timeout", required_argument, 0, OPT_START_TIMEOUT}, + {"disable-pty", no_argument, 0, OPT_DISABLE_PTY}, + {"open-stdin", no_argument, 0, OPT_OPEN_STDIN}, + {"start-timeout", required_argument, 0, OPT_START_TIMEOUT}, +#endif LXC_COMMON_OPTIONS }; @@ -118,6 +134,38 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) case OPT_SHARE_PID: args->share_ns[LXC_NS_PID] = arg; break; + +#ifdef HAVE_ISULAD + case OPT_CONTAINER_INFO: + args->container_info = arg; + break; + case OPT_INPUT_FIFO: + args->terminal_fifos[0] = arg; + break; + case OPT_OUTPUT_FIFO: + args->terminal_fifos[1] = arg; + break; + case OPT_STDERR_FIFO: + args->terminal_fifos[2] = arg; + break; + case OPT_EXIT_FIFO: + args->exit_monitor_fifo = arg; + break; + case OPT_DISABLE_PTY: + args->disable_pty = 1; + break; + case OPT_OPEN_STDIN: + args->open_stdin = 1; + break; + case OPT_START_TIMEOUT: + if(!is_non_negative_num(arg)) { + fprintf(stderr, "Error start timeout parameter:%s.\n", arg); + return -1; + } + args->start_timeout = (unsigned int)atoi(arg); + break; +#endif + } return 0; } @@ -163,6 +211,9 @@ int main(int argc, char *argv[]) "/sbin/init", NULL, }; +#ifdef HAVE_ISULAD + char *container_info_file = NULL; +#endif lxc_list_init(&defines); @@ -283,6 +334,42 @@ int main(int argc, char *argv[]) goto out; } +#ifdef HAVE_ISULAD + /* isulad: container info file used to store pid and ppid info of container*/ + if (my_args.container_info != NULL) { + if (ensure_path(&container_info_file, my_args.container_info) < 0) { + ERROR("Failed to ensure container's piddile '%s'", my_args.container_info); + goto out; + } + if (!c->set_container_info_file(c, container_info_file)) { + ERROR("Failed to set container's piddile '%s'", container_info_file); + goto out; + } + } + + if (my_args.terminal_fifos[0] || my_args.terminal_fifos[1] || my_args.terminal_fifos[2]) { + c->set_terminal_init_fifos(c, my_args.terminal_fifos[0], my_args.terminal_fifos[1], my_args.terminal_fifos[2]); + } + + /* isulad: fifo used to monitor state of monitor process */ + if (my_args.exit_monitor_fifo != NULL) { + c->exit_fifo = safe_strdup(my_args.exit_monitor_fifo); + } + + if (my_args.disable_pty) { + c->want_disable_pty(c, true); + } + + if (my_args.open_stdin) { + c->want_open_stdin(c, true); + } + + /* isulad: add start timeout */ + if(my_args.start_timeout) { + c->set_start_timeout(c, my_args.start_timeout); + } +#endif + if (my_args.console) if (!c->set_config_item(c, "lxc.console.path", my_args.console)) goto out; @@ -305,6 +392,11 @@ int main(int argc, char *argv[]) else err = c->start(c, 0, args) ? EXIT_SUCCESS : EXIT_FAILURE; if (err) { +#ifdef HAVE_ISULAD + if (c->lxc_conf->errmsg) + fprintf(stderr, "%s:%s:%s:%d starting container process caused \"%s\"", c->name, + __FILE__, __func__, __LINE__, c->lxc_conf->errmsg); +#endif ERROR("The container failed to start"); if (my_args.daemonize) @@ -320,5 +412,8 @@ int main(int argc, char *argv[]) out: lxc_container_put(c); +#ifdef HAVE_ISULAD + free(container_info_file); +#endif exit(err); } diff --git a/src/lxc/utils.c b/src/lxc/utils.c index 88d0f85ee..4e418fbb9 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include "config.h" #include "log.h" @@ -35,7 +37,7 @@ #include "memory_utils.h" #include "namespace.h" #include "parse.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "syscall_wrappers.h" #include "utils.h" @@ -71,6 +73,9 @@ static int _recursive_rmdir(const char *dirname, dev_t pdev, int ret; struct dirent *direntp; char pathname[PATH_MAX]; +#ifdef HAVE_ISULAD + int saved_errno = 0; +#endif dir = opendir(dirname); if (!dir) @@ -133,6 +138,11 @@ static int _recursive_rmdir(const char *dirname, dev_t pdev, } else { ret = unlink(pathname); if (ret < 0) { +#ifdef HAVE_ISULAD + if (saved_errno == 0) { + saved_errno = errno; + } +#endif __do_close int fd = -EBADF; fd = open(pathname, O_RDONLY | O_CLOEXEC | O_NONBLOCK); @@ -158,10 +168,18 @@ static int _recursive_rmdir(const char *dirname, dev_t pdev, } if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) { +#ifdef HAVE_ISULAD + if (saved_errno == 0) { + saved_errno = errno; + } +#endif SYSERROR("Failed to delete \"%s\"", dirname); failed = 1; } +#ifdef HAVE_ISULAD + errno = saved_errno; +#endif return failed ? -1 : 0; } @@ -1008,7 +1026,7 @@ static int open_if_safe(int dirfd, const char *nextpath) * * Return an open fd for the path, or <0 on error. */ -static int open_without_symlink(const char *target, const char *prefix_skip) +int open_without_symlink(const char *target, const char *prefix_skip) { int curlen = 0, dirfd, fulllen, i; char *dup; @@ -1425,6 +1443,11 @@ static int lxc_get_unused_loop_dev(char *name_loop) { int loop_nr, ret; int fd_ctl = -1, fd_tmp = -1; +#if HAVE_ISULAD + // isulad: retry and try mknod + int max_retry = 200; + bool try_mknod = true; +#endif fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC); if (fd_ctl < 0) { @@ -1442,8 +1465,37 @@ static int lxc_get_unused_loop_dev(char *name_loop) if (ret < 0 || ret >= LO_NAME_SIZE) goto on_error; +#if HAVE_ISULAD +retry: +#endif fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); if (fd_tmp < 0) { +#if HAVE_ISULAD + /* Success of LOOP_CTL_GET_FREE doesn't mean /dev/loop$i is ready, + * we try to make node by ourself to avoid wait. */ + if (try_mknod) { + /* Do not check result of mknod because LOOP_CTL_GET_FREE + * alse do mknod, so this mknod may fail as node already + * exist. If we can open the node without error, we can + * say that it's be created successfully. + * + * note: 7 is the major device number of loopback devices + * in kernel. + */ + mknod(name_loop, S_IFBLK | 0640, makedev(7, loop_nr)); + try_mknod = false; + goto retry; + } + /* we need to wait some time to make sure it's ready for open if + * it can't open even if we have already try to make node by ourself. */ + if (max_retry > 0) { + max_retry--; + usleep(5000); /* 5 millisecond */ + goto retry; + } + SYSERROR("Failed to open loop \"%s\"", name_loop); + goto on_error; +#else /* on Android loop devices are moved under /dev/block, give it a shot */ ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/block/loop%d", loop_nr); if (ret < 0 || ret >= LO_NAME_SIZE) @@ -1452,6 +1504,7 @@ static int lxc_get_unused_loop_dev(char *name_loop) fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); if (fd_tmp < 0) SYSERROR("Failed to open loop \"%s\"", name_loop); +#endif } on_error: @@ -1661,6 +1714,7 @@ uint64_t lxc_find_next_power2(uint64_t n) return n; } +#ifndef HAVE_ISULAD static int process_dead(/* takes */ int status_fd) { __do_close int dupfd = -EBADF; @@ -1698,15 +1752,19 @@ static int process_dead(/* takes */ int status_fd) return ret; } +#endif int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd) { int ret; +#ifndef HAVE_ISULAD pid_t ppid; +#endif ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0), prctl_arg(0), prctl_arg(0)); +#ifndef HAVE_ISULAD /* verify that we haven't been orphaned in the meantime */ ppid = (pid_t)syscall(SYS_getppid); if (ppid == 0) { /* parent outside our pidns */ @@ -1718,6 +1776,7 @@ int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd) } else if (ppid != parent) { return raise(SIGKILL); } +#endif if (ret < 0) return -1; @@ -1755,8 +1814,13 @@ int lxc_rm_rf(const char *dirname) struct dirent *direntp; dir = opendir(dirname); - if (!dir) + if (!dir) { + if (errno == ENOENT) { + WARN("Destroy path: \"%s\" do not exist", dirname); + return 0; + } return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname); + } while ((direntp = readdir(dir))) { __do_free char *pathname = NULL; @@ -1904,3 +1968,230 @@ int fix_stdio_permissions(uid_t uid) return fret; } + +#ifdef HAVE_ISULAD +/* isulad: write error message */ +void lxc_write_error_message(int errfd, const char *format, ...) +{ + int ret; + char errbuf[BUFSIZ + 1] = {0}; + ssize_t sret; + va_list argp; + + if (errfd <= 0) + return; + + va_start(argp, format); + ret = vsnprintf(errbuf, BUFSIZ, format, argp); + va_end(argp); + if (ret < 0 || ret >= BUFSIZ) + SYSERROR("Failed to call vsnprintf"); + sret = write(errfd, errbuf, strlen(errbuf)); + if (sret < 0) + SYSERROR("Write errbuf failed"); +} + +/* isulad: read file to buffer */ +int lxc_file2str(const char *filename, char ret[], int cap) +{ + int fd, num_read; + + if ((fd = lxc_open(filename, O_RDONLY | O_CLOEXEC, 0)) == -1) + return -1; + if ((num_read = read(fd, ret, cap - 1)) <= 0) + num_read = -1; + else + ret[num_read] = 0; + close(fd); + + return num_read; +} + +/* isuald: lxc_stat2proc() makes sure it can handle arbitrary executable file basenames + * for `cmd', i.e. those with embedded whitespace or embedded ')'s. + * Such names confuse %s (see scanf(3)), so the string is split and %39c + * is used instead. (except for embedded ')' "(%[^)]c)" would work. + */ +static proc_t *lxc_stat2proc(const char *S) +{ + int num; + proc_t *P = NULL; + char *tmp = NULL; + + if (!S) + return NULL; + + tmp = strrchr(S, ')'); /* split into "PID (cmd" and "" */ + if (!tmp) + return NULL; + *tmp = '\0'; /* replace trailing ')' with NUL */ + + P = malloc(sizeof(proc_t)); + if (P == NULL) + return NULL; + (void)memset(P, 0x00, sizeof(proc_t)); + + /* parse these two strings separately, skipping the leading "(". */ + num = sscanf(S, "%d (%15c", &P->pid, P->cmd); /* comm[16] in kernel */ + if (num != 2) { + ERROR("Call sscanf error: %s", errno ? strerror(errno) : ""); + free(P); + return NULL; + } + num = sscanf(tmp + 2, /* skip space after ')' too */ + "%c " + "%d %d %d %d %d " + "%lu %lu %lu %lu %lu " + "%Lu %Lu %Lu %Lu " /* utime stime cutime cstime */ + "%ld %ld %ld %ld " + "%Lu " /* start_time */ + "%lu " + "%ld " + "%lu %lu %lu %lu %lu %lu " + "%*s %*s %*s %*s " /* discard, no RT signals & Linux 2.1 used hex */ + "%lu %lu %lu " + "%d %d " + "%lu %lu", + &P->state, + &P->ppid, &P->pgrp, &P->session, &P->tty, &P->tpgid, + &P->flags, &P->min_flt, &P->cmin_flt, &P->maj_flt, &P->cmaj_flt, + &P->utime, &P->stime, &P->cutime, &P->cstime, + &P->priority, &P->nice, &P->timeout, &P->it_real_value, + &P->start_time, + &P->vsize, + &P->rss, + &P->rss_rlim, &P->start_code, &P->end_code, &P->start_stack, &P->kstk_esp, + &P->kstk_eip, + &P->wchan, &P->nswap, &P->cnswap, + &P->exit_signal, &P->processor, /* 2.2.1 ends with "exit_signal" */ + &P->rtprio, &P->sched /* both added to 2.5.18 */ + ); + if (num != 35) { + ERROR("Call sscanf error: %s", errno ? strerror(errno) : ""); + free(P); + return NULL; + } + if (P->tty == 0) + P->tty = -1; /* the old notty val, update elsewhere bef. moving to 0 */ + return P; +} + +/* isulad: get starttime of process pid */ +unsigned long long lxc_get_process_startat(pid_t pid) +{ + int sret = 0; + unsigned long long startat = 0; + proc_t *pid_info = NULL; + char filename[PATH_MAX] = {0}; + char sbuf[1024] = {0}; /* bufs for stat */ + + sret = snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); + if (sret < 0 || sret >= sizeof(filename)) { + ERROR("Failed to sprintf filename"); + goto out; + } + + if ((lxc_file2str(filename, sbuf, sizeof(sbuf))) == -1) { + SYSERROR("Failed to read pidfile %s", filename); + goto out; + } + + pid_info = lxc_stat2proc(sbuf); + if (!pid_info) { + ERROR("Failed to get proc stat info"); + goto out; + } + + startat = pid_info->start_time; +out: + free(pid_info); + return startat; +} + +// isulad: set env home in container +int lxc_setup_env_home(uid_t uid) +{ +#define __PASSWD_FILE__ "/etc/passwd" + char *homedir = "/"; // default home dir is / + FILE *stream = NULL; + struct passwd pw, *pwbufp = NULL; + char buf[BUFSIZ]; + + stream = fopen_cloexec(__PASSWD_FILE__, "r"); + if (stream == NULL) { + SYSWARN("Failed to open %s", __PASSWD_FILE__); + goto set_env; + } + + while (fgetpwent_r(stream, &pw, buf, sizeof(buf), &pwbufp) == 0 && pwbufp != NULL) { + if (pwbufp->pw_uid == uid) { + homedir = pwbufp->pw_dir; + goto set_env; + } + } + WARN("User invalid, can not find user '%u'", uid); + +set_env: + if (stream) + fclose(stream); + + // if we didn't configure HOME, set it based on uid + if (setenv("HOME", homedir, 0) < 0) { + SYSERROR("Unable to set env 'HOME'"); + return -1; + } + + NOTICE("Setted env 'HOME' to %s", homedir); + return 0; +} + +bool lxc_process_alive(pid_t pid, unsigned long long start_time) +{ + int sret = 0; + bool alive = true; + proc_t *pid_info = NULL; + char filename[PATH_MAX] = {0}; + char sbuf[1024] = {0}; /* bufs for stat */ + + sret = kill(pid, 0); + if (sret < 0 && errno == ESRCH) + return false; + + sret = snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); + if (sret < 0 || sret >= sizeof(filename)) { + ERROR("Failed to sprintf filename"); + goto out; + } + + if ((lxc_file2str(filename, sbuf, sizeof(sbuf))) == -1) { + ERROR("Failed to read pidfile %s", filename); + alive = false; + goto out; + } + + pid_info = lxc_stat2proc(sbuf); + if (!pid_info) { + ERROR("Failed to get proc stat info"); + alive = false; + goto out; + } + + if (start_time != pid_info->start_time) + alive = false; +out: + free(pid_info); + return alive; +} + +bool is_non_negative_num(const char *s) +{ + if (!s || !strcmp(s, "")) + return false; + while(*s != '\0') { + if(!isdigit(*s)) + return false; + ++s; + } + return true; +} +#endif diff --git a/src/lxc/utils.h b/src/lxc/utils.h index cf2c04251..39ef5792f 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -25,9 +25,16 @@ #include "initutils.h" #include "macro.h" #include "memory_utils.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "string_utils.h" +#ifdef HAVE_ISULAD +#include "isulad_utils.h" + +/* isulad: replace space with SPACE_MAGIC_STR */ +#define SPACE_MAGIC_STR "[#)" +#endif + /* returns 1 on success, 0 if there were any failures */ extern int lxc_rmdir_onedev(const char *path, const char *exclude); extern int get_u16(unsigned short *val, const char *arg, int base); @@ -41,6 +48,73 @@ extern char *get_rundir(void); #endif #endif +#ifdef HAVE_ISULAD +/* isulad: + ld cutime, cstime, priority, nice, timeout, it_real_value, rss, + c state, + d ppid, pgrp, session, tty, tpgid, + s signal, blocked, sigignore, sigcatch, + lu flags, min_flt, cmin_flt, maj_flt, cmaj_flt, utime, stime, + lu rss_rlim, start_code, end_code, start_stack, kstk_esp, kstk_eip, + lu start_time, vsize, wchan, nswap, cnswap, +*/ + +/* Basic data structure which holds all information we can get about a process. + * (unless otherwise specified, fields are read from /proc/#/stat) + * + * Most of it comes from task_struct in linux/sched.h + */ +typedef struct proc_t { + // 1st 16 bytes + int pid; /* process id */ + int ppid; /* pid of parent process */ + + char state; /* single-char code for process state (S=sleeping) */ + + unsigned long long + utime, /* user-mode CPU time accumulated by process */ + stime, /* kernel-mode CPU time accumulated by process */ + // and so on... + cutime, /* cumulative utime of process and reaped children */ + cstime, /* cumulative stime of process and reaped children */ + start_time; /* start time of process -- seconds since 1-1-70 */ + + long + priority, /* kernel scheduling priority */ + timeout, /* ? */ + nice, /* standard unix nice level of process */ + rss, /* resident set size from /proc/#/stat (pages) */ + it_real_value; /* ? */ + unsigned long + rtprio, /* real-time priority */ + sched, /* scheduling class */ + vsize, /* number of pages of virtual memory ... */ + rss_rlim, /* resident set size limit? */ + flags, /* kernel flags for the process */ + min_flt, /* number of minor page faults since process start */ + maj_flt, /* number of major page faults since process start */ + cmin_flt, /* cumulative min_flt of process and child processes */ + cmaj_flt, /* cumulative maj_flt of process and child processes */ + nswap, /* ? */ + cnswap, /* cumulative nswap ? */ + start_code, /* address of beginning of code segment */ + end_code, /* address of end of code segment */ + start_stack, /* address of the bottom of stack for the process */ + kstk_esp, /* kernel stack pointer */ + kstk_eip, /* kernel instruction pointer */ + wchan; /* address of kernel wait channel proc is sleeping in */ + + char cmd[16]; /* basename of executable file in call to exec(2) */ + int + pgrp, /* process group id */ + session, /* session id */ + tty, /* full device number of controlling terminal */ + tpgid, /* terminal process group id */ + exit_signal, /* might not be SIGCHLD */ + processor; /* current (or most recent?) CPU */ +} proc_t; +#endif + static inline int lxc_set_cloexec(int fd) { return fcntl(fd, F_SETFD, FD_CLOEXEC); @@ -145,6 +219,7 @@ extern bool cgns_supported(void); extern char *choose_init(const char *rootfs); extern bool switch_to_ns(pid_t pid, const char *ns); extern char *get_template_path(const char *t); +extern int open_without_symlink(const char *target, const char *prefix_skip); extern int safe_mount(const char *src, const char *dest, const char *fstype, unsigned long flags, const void *data, const char *rootfs); @@ -241,14 +316,16 @@ extern bool lxc_can_use_pidfd(int pidfd); extern int fix_stdio_permissions(uid_t uid); -static inline bool uid_valid(uid_t uid) -{ - return uid != LXC_INVALID_UID; -} +#ifdef HAVE_ISULAD +extern void lxc_write_error_message(int errfd, const char *format, ...); +extern int lxc_file2str(const char *filename, char ret[], int cap); +extern int unsigned long long lxc_get_process_startat(pid_t pid); +// set env home in container +extern int lxc_setup_env_home(uid_t uid); -static inline bool gid_valid(gid_t gid) -{ - return gid != LXC_INVALID_GID; -} +extern bool lxc_process_alive(pid_t pid, unsigned long long start_time); + +extern bool is_non_negative_num(const char *s); +#endif #endif /* __LXC_UTILS_H */ diff --git a/src/lxc/uuid.c b/src/lxc/uuid.c index 256225b8f..a5d24bbcb 100644 --- a/src/lxc/uuid.c +++ b/src/lxc/uuid.c @@ -116,7 +116,7 @@ int lxc_id128_write_fd(int fd, lxc_id128_t id) int lxc_id128_write(const char *p, lxc_id128_t id) { - __do_close int fd = -EBADF; + int fd = -1; fd = open(p, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_TRUNC, 0444); if (fd < 0) diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am index 11bba260a..59905d326 100644 --- a/src/tests/Makefile.am +++ b/src/tests/Makefile.am @@ -30,7 +30,7 @@ lxc_test_parse_config_file_SOURCES = parse_config_file.c \ lxc_test_raw_clone_SOURCES = lxc_raw_clone.c \ lxctest.h \ ../lxc/namespace.c ../lxc/namespace.h \ - ../lxc/process_utils.c ../lxc/process_utils.h + ../lxc/raw_syscalls.c ../lxc/raw_syscalls.h ../lxc/utils.c ../lxc/utils.h lxc_test_reboot_SOURCES = reboot.c lxc_test_saveconfig_SOURCES = saveconfig.c @@ -58,6 +58,10 @@ AM_CFLAGS=-DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ -I $(top_srcdir)/src/lxc/tools \ -pthread +if HAVE_ISULAD +AM_CFLAGS += -I $(top_srcdir)/src/lxc/json +endif + if ENABLE_APPARMOR AM_CFLAGS += -DHAVE_APPARMOR endif @@ -114,8 +118,7 @@ bin_SCRIPTS += lxc-test-automount \ lxc-test-createconfig \ lxc-test-exit-code \ lxc-test-no-new-privs \ - lxc-test-rootfs \ - lxc-test-usernsexec + lxc-test-rootfs if DISTRO_UBUNTU bin_SCRIPTS += lxc-test-lxc-attach \ @@ -164,7 +167,6 @@ EXTRA_DIST = basic.c \ lxc-test-snapdeps \ lxc-test-symlink \ lxc-test-unpriv \ - lxc-test-usernsexec \ lxc-test-utils.c \ may_control.c \ mount_injection.c \ diff --git a/src/tests/attach.c b/src/tests/attach.c index 07e641d56..acb4c89f4 100644 --- a/src/tests/attach.c +++ b/src/tests/attach.c @@ -29,6 +29,7 @@ #include "lxctest.h" #include "utils.h" #include "lsm/lsm.h" +#include "config.h" #include @@ -76,7 +77,11 @@ static void test_attach_lsm_set_config(struct lxc_container *ct) ct->save_config(ct, NULL); } +#ifdef HAVE_ISULAD +static int test_attach_lsm_func_func(void* payload, int fd) +#else static int test_attach_lsm_func_func(void* payload) +#endif { TSTOUT("%s", lsm_process_label_get(syscall(SYS_getpid))); return 0; @@ -187,7 +192,11 @@ static int test_attach_lsm_func(struct lxc_container *ct) { return 0; } static int test_attach_lsm_cmd(struct lxc_container *ct) { return 0; } #endif /* HAVE_APPARMOR || HAVE_SELINUX */ +#ifdef HAVE_ISULAD +static int test_attach_func_func(void* payload, int fd) +#else static int test_attach_func_func(void* payload) +#endif { TSTOUT("%d", (int)syscall(SYS_getpid)); return 0; diff --git a/src/tests/console.c b/src/tests/console.c index c88f4329b..c0ad16033 100644 --- a/src/tests/console.c +++ b/src/tests/console.c @@ -37,14 +37,14 @@ } while (0) static void test_console_close_all(int ttyfd[MAXCONSOLES], - int ptmxfd[MAXCONSOLES]) + int masterfd[MAXCONSOLES]) { int i; for (i = 0; i < MAXCONSOLES; i++) { - if (ptmxfd[i] != -1) { - close(ptmxfd[i]); - ptmxfd[i] = -1; + if (masterfd[i] != -1) { + close(masterfd[i]); + masterfd[i] = -1; } if (ttyfd[i] != -1) { @@ -59,14 +59,14 @@ static int test_console_running_container(struct lxc_container *c) int nrconsoles, i, ret = -1; int ttynum [MAXCONSOLES]; int ttyfd [MAXCONSOLES]; - int ptmxfd[MAXCONSOLES]; + int masterfd[MAXCONSOLES]; for (i = 0; i < MAXCONSOLES; i++) - ttynum[i] = ttyfd[i] = ptmxfd[i] = -1; + ttynum[i] = ttyfd[i] = masterfd[i] = -1; ttynum[0] = 1; - ret = c->console_getfd(c, &ttynum[0], &ptmxfd[0]); + ret = c->console_getfd(c, &ttynum[0], &masterfd[0]); if (ret < 0) { TSTERR("console allocate failed"); goto err1; @@ -79,12 +79,12 @@ static int test_console_running_container(struct lxc_container *c) } /* attempt to alloc same ttynum */ - ret = c->console_getfd(c, &ttynum[0], &ptmxfd[1]); + ret = c->console_getfd(c, &ttynum[0], &masterfd[1]); if (ret != -1) { TSTERR("console allocate should fail for allocated ttynum %d", ttynum[0]); goto err2; } - close(ptmxfd[0]); ptmxfd[0] = -1; + close(masterfd[0]); masterfd[0] = -1; close(ttyfd[0]); ttyfd[0] = -1; /* ensure we can allocate all consoles, we do this a few times to @@ -92,7 +92,7 @@ static int test_console_running_container(struct lxc_container *c) */ for (i = 0; i < 10; i++) { for (nrconsoles = 0; nrconsoles < MAXCONSOLES; nrconsoles++) { - ret = c->console_getfd(c, &ttynum[nrconsoles], &ptmxfd[nrconsoles]); + ret = c->console_getfd(c, &ttynum[nrconsoles], &masterfd[nrconsoles]); if (ret < 0) break; ttyfd[nrconsoles] = ret; @@ -103,13 +103,13 @@ static int test_console_running_container(struct lxc_container *c) goto err2; } - test_console_close_all(ttyfd, ptmxfd); + test_console_close_all(ttyfd, masterfd); } ret = 0; err2: - test_console_close_all(ttyfd, ptmxfd); + test_console_close_all(ttyfd, masterfd); err1: return ret; diff --git a/src/tests/containertests.c b/src/tests/containertests.c index 0fb6fbdfb..b28bcd56d 100644 --- a/src/tests/containertests.c +++ b/src/tests/containertests.c @@ -135,7 +135,7 @@ int main(int argc, char *argv[]) str = c->config_file_name(c); #define CONFIGFNAM LXCPATH "/" MYNAME "/config" - if (str && strcmp(str, CONFIGFNAM)) { + if (!str || strcmp(str, CONFIGFNAM)) { fprintf(stderr, "%d: got wrong config file name (%s, not %s)\n", __LINE__, str, CONFIGFNAM); goto out; } diff --git a/src/tests/lxc-test-no-new-privs b/src/tests/lxc-test-no-new-privs index cfcb43bd6..8642992dd 100755 --- a/src/tests/lxc-test-no-new-privs +++ b/src/tests/lxc-test-no-new-privs @@ -36,13 +36,11 @@ cleanup() { trap cleanup EXIT SIGHUP SIGINT SIGTERM -if [ ! -d /etc/lxc ]; then - mkdir -p /etc/lxc/ - cat > /etc/lxc/default.conf << EOF +mkdir -p /etc/lxc/ +cat > /etc/lxc/default.conf << EOF lxc.net.0.type = veth lxc.net.0.link = lxcbr0 EOF -fi ARCH=i386 if type dpkg >/dev/null 2>&1; then diff --git a/src/tests/lxc-test-usernsexec b/src/tests/lxc-test-usernsexec deleted file mode 100755 index 0ee48b353..000000000 --- a/src/tests/lxc-test-usernsexec +++ /dev/null @@ -1,368 +0,0 @@ -#!/bin/bash -# -# This is a bash test case to test lxc-usernsexec. -# It basically supports usring lxc-usernsexec to execute itself -# and then create files and check that their ownership is as expected. -# -# It requires that the current user has at least 1 value in subuid and /etc/subgid -TEMP_D="" -VERBOSITY=0 -set -f - -fail() { echo "$@" 1>&2; exit 1; } -error() { echo "$@" 1>&2; } -skip() { - error "SKIP:" "$@" - exit 0 -} -debug() { - local level=${1}; shift; - [ "${level}" -gt "${VERBOSITY}" ] && return - error "${@}" -} - -collect_owners() { - # collect_owners([--dir=dir], file1, file2 ...) - # set _RET to a space delimited array of - # :owner:group :owner:group ... - local out="" ret="" dir="" - if [ "${1#--dir=}" != "$1" ]; then - dir="${1#--dir=}" - shift - fi - for arg in "$@"; do - # drop the :* so that input can be same as touch_files. - out=$(stat --format "%n:%u:%g" "${dir}${arg}") || { - error "failed to stat ${arg}" - return 1; - } - ret="$ret ${out##*/}" - done - _RET="${ret# }" -} - -cleanup() { - if [ -d "$TEMP_D" ]; then - rm -Rf "$TEMP_D" - fi -} - -touch_files() { - # touch_files tok [tok ...] - # tok is filename:chown_id:chown_gid - # if chown_id or chown_gid is empty, then chown will do the right thing - # and only change the provided value. - local args="" tok="" fname="" uidgid="" - args=( "$@" ) - for tok in "$@"; do - fname=${tok%%:*} - uidgid=${tok#$fname} - uidgid=${uidgid#:} - : > "$fname" || { error "failed to create $fname"; return 1; } - [ -z "$uidgid" ] && continue - chown $uidgid "$fname" || { error "failed to chmod '$uidgid' $fname ($?)"; return 1; } - done -} - -inside_cleanup() { - local f="" - rm -f "${FILES[@]}" - echo "$STATUS" >&5 - echo "$STATUS" >&6 -} - -set_files() { - local x="" - FILES=( ) - for x in "$@"; do - FILES[${#FILES[@]}]="${x%%:*}" - done -} - -inside() { - # this what gets run inside the usernsexec environment. - # basically expects arguments of :uid:gid - # it will create the file, and then chmod it to the provided uid:gid - # it writes to file descriptor 5 a single line with space delimited - # exit_value uid gid [:: ... ] - STATUS=127 - trap inside_cleanup EXIT - local uid="" gid="" x="" - - uid=$(id -u) || fail "failed execution of id -u" - gid=$(id -g) || fail "failed execution of id -g" - - set_files "$@" - - touch_files "$@" || fail "failed to create files" - - collect_owners "${FILES[@]}" || fail "failed to collect owners" - result="$_RET" - - # tell caller we are done. - echo "0" "$uid" "$gid" "$result" >&5 - STATUS=0 - - # let the caller do things while the files are around. - read -t 30 x <&6 - - exit -} - -runtest() { - # runtest(mydir, nsexec_args, [inside [...]]) - # - use 'mydir' as a working dir. - # - execute lxc-usernsexec $nsexec_args -- inside - # - # write to stdout - # exit_value inside_exit_value inside_uid:inside_gid - # - # where results are a list of space separated - # filename:uid:gid - # for each file passed in inside_args - [ $# -ge 3 ] || { error "runtest expects 2 args"; return 1; } - local mydir="$1" nsexec_args="$2" - shift 2 - local ret inside_owners t="" - KIDPID="" - - mkfifo "${mydir}/5" && exec 5<>"${mydir}/5" || return - mkfifo "${mydir}/6" && exec 6<>"${mydir}/6" || return - mkdir --mode=777 "${mydir}/work" || return - cd "${mydir}/work" - - set_files "$@" - - local results="" oresults="" iresults="" iuid="" igid="" n=0 - - error "$" $USERNSEXEC ${nsexec_args} -- "$MYPATH" inside "$*" - ${USERNSEXEC} ${nsexec_args} -- "$MYPATH" inside "$@" & - KIDPID=$! - - [ -d "/proc/$KIDPID" ] || { - wait $KIDPID - fail "kid $KIDPID died quickly $?" - } - - # if lxc-usernsexec fails to execute MYPATH inside, then - # the read below would timeout. To avoid a long timeout, - # we do a short timeout and check the pid is alive. - while ! read -t 1 ret iuid igid inside_owners <&5; do - n=$((n+1)) - if [ ! -d "/proc/$KIDPID" ]; then - wait $KIDPID - fail "kid $KIDPID is gone $?" - fi - [ $n -ge 30 ] && fail "child never wrote to pipe" - done - iresults=( $inside_owners ) - - collect_owners "--dir=${mydir}/work/" "${FILES[@]}" || return - oresults=( $_RET ) - - echo 0 >&6 - wait - - ret=$? - - results=( ) - for((i=0;i<${#iresults[@]};i++)); do - results[$i]="${oresults[$i]}:${iresults[$i]#*:}" - done - - echo 0 $ret "$iuid:$igid" "${results[@]}" -} - -runcheck() { - local name="$1" expected="$2" nsexec_args="$3" found="" - shift 3 - mkdir "${TEMP_D}/$name" || fail "failed mkdir /$name.d" - local err="${TEMP_D}/$name.err" - out=$("$MYPATH" runtest "${TEMP_D}/$name" "$nsexec_args" "$@" 2>"$err") || { - error "$name: FAIL - runtest failed $?" - [ -n "$out" ] && error " $out" - sed 's,^, ,' "$err" 1>&2 - ERRORS="${ERRORS} $name" - return 1 - } - set -- $out - local parentrc=$1 kidrc=$2 iuidgid="$3" found="" - shift 3 - found="$*" - [ "$parentrc" = "0" -a "$kidrc" = "0" ] || { - error "$name: FAIL - parentrc=$parentrc kidrc=$kidrc found=$found" - ERRORS="${ERRORS} $name" - return 1 - } - [ "$expected" = "$found" ] && { - error "$name: PASS" - PASS="${PASSES} $name" - return 0 - } - echo "$name: FAIL expected '$expected' != found '$found'" - FAILS="${FAILS} $name" - return 1 -} - -setup_Usage() { - cat <> /etc/subuid || { - error "failed to add $asuser to /etc/subuid" - } - fi - - subgid=$(awk -F: '$1 == n { print $2; exit(0); }' "n=$asuser" /etc/subgid) || { - error "failed to read /etc/subgid for $asuser" - return 1 - } - if [ -n "$subgid" ]; then - debug 1 "$asuser already had subgid=$subgid" - else - debug 1 "adding $asuser:$create_subgid to /etc/subgid" - echo "$asuser:$create_subgid" >> /etc/subgid || { - error "failed to add $asuser to /etc/subgid" - } - fi - - debug 0 "as $asuser executing ${MYPATH} ${pt_args[*]}" - sudo -Hu "$asuser" "${MYPATH}" "${pt_args[@]}" -} - -USERNSEXEC=${USERNSEXEC:-lxc-usernsexec} -MYPATH=$(readlink -f "$0") || { echo "failed to get full path to self: $0"; exit 1; } -export MYPATH - -if [ "$1" = "inside" ]; then - shift - inside "$@" - exit -elif [ "$1" = "runtest" ]; then - shift - runtest "$@" - exit -elif [ "$1" = "setup_and_run" ]; then - shift - setup_and_run "$@" - exit -fi - -name=$(id --user --name) || fail "failed to get username" -if [ "$name" = "root" ]; then - setup_and_run "$@" - exit -fi - -subuid=$(awk -F: '$1 == n { print $2; exit(0); }' "n=$name" /etc/subuid) && - [ -n "$subuid" ] || fail "did not find $name in /etc/subuid" - -subgid=$(awk -F: '$1 == n { print $2; exit(0); }' "n=$name" /etc/subgid) && - [ -n "$subgid" ] || fail "did not find $name in /etc/subgid" - - -uid=$(id --user) || fail "failed to get uid" -gid=$(id --group) || fail "failed to get gid" - -mapuid="u:0:$uid:1" -mapgid="g:0:$gid:1" - -ver=$(dpkg-query --show lxc-utils | awk '{print $2}') -error "uid=$uid gid=$gid name=$name subuid=$subuid subgid=$subgid ver=$ver" -error "lxc-utils=$ver kver=$(uname -r)" -error "USERNSEXEC=$USERNSEXEC" - -TEMP_D=$(mktemp -d) -trap cleanup EXIT - -PASSES=""; FAILS=""; ERRORS="" -runcheck nouidgid "f0:$subuid:$subgid:0:0" "" f0 - -runcheck myuidgid "f0:$uid:$gid:0:0" \ - "-m$mapuid -m$mapgid" f0 - -runcheck subuidgid \ - "f0:$subuid:$subgid:0:0" \ - "-mu:0:$subuid:1 -mg:0:$subgid:1" f0:0:0 - -runcheck bothsets "f0:$uid:$gid:0:0 f1:$subuid:$subgid:1:1 f2:$uid:$subgid:0:1" \ - "-m$mapuid -m$mapgid -mu:1:$subuid:1 -mg:1:$subgid:1" \ - f0 f1:1:1 f2::1 - -runcheck mismatch "f0:$uid:$subgid:0:0 f1:$subuid:$gid:15:31" \ - "-mu:0:$uid:1 -mg:0:$subgid:1 -mu:15:$subuid:1 -mg:31:$gid:1" \ - f0 f1:15:31 - -FAILS=${FAILS# } -ERRORS=${ERRORS# } -PASSES=${PASSES# } - -[ -z "${FAILS}" ] || error "FAILS: ${FAILS}" -[ -z "${ERRORS}" ] || error "ERRORS: ${ERRORS}" -[ -z "${FAILS}" -a -z "${ERRORS}" ] || exit 1 -exit 0 diff --git a/src/tests/lxc_raw_clone.c b/src/tests/lxc_raw_clone.c index f72e20ccc..655454f39 100644 --- a/src/tests/lxc_raw_clone.c +++ b/src/tests/lxc_raw_clone.c @@ -39,7 +39,7 @@ #include "lxctest.h" #include "namespace.h" -#include "process_utils.h" +#include "raw_syscalls.h" #include "utils.h" int main(int argc, char *argv[]) diff --git a/templates/lxc-oci.in b/templates/lxc-oci.in index dab077191..8017c38c1 100644 --- a/templates/lxc-oci.in +++ b/templates/lxc-oci.in @@ -348,7 +348,8 @@ fi # shellcheck disable=SC2039 # shellcheck disable=SC2068 umoci --log=error unpack ${umoci_args[@]} --image "${DOWNLOAD_TEMP}:latest" "${LXC_ROOTFS}.tmp" -find "${LXC_ROOTFS}.tmp/rootfs" -mindepth 1 -maxdepth 1 -exec mv '{}' "${LXC_ROOTFS}/" \; +rmdir "${LXC_ROOTFS}" +mv "${LXC_ROOTFS}.tmp/rootfs" "${LXC_ROOTFS}" OCI_CONF_FILE=$(getconfigpath "${DOWNLOAD_TEMP}" latest) LXC_CONF_FILE="${LXC_PATH}/config" -- 2.25.1