From 327e83ff13bec4bf1fa80ede9515d3f9531d7d1f Mon Sep 17 00:00:00 2001 From: wujing Date: Wed, 15 Apr 2020 06:37:43 -0400 Subject: [PATCH 35/49] Seccomp security feature enhanced Signed-off-by: wujing --- src/lxc/conf.c | 3 +- src/lxc/seccomp.c | 548 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 548 insertions(+), 3 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 6856b1d..e8ee749 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -4730,8 +4730,7 @@ int lxc_setup(struct lxc_handler *handler) // isulad: create link /etc/mtab for /proc/mounts if (create_mtab_link() != 0) { - ERROR("failed to create link /etc/mtab for target /proc/mounts"); - goto on_error; + return log_error(-1, "failed to create link /etc/mtab for target /proc/mounts"); } #endif diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c index 081d315..a75adb7 100644 --- a/src/lxc/seccomp.c +++ b/src/lxc/seccomp.c @@ -295,7 +295,11 @@ on_error: #endif #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH +#ifdef HAVE_ISULAD +enum lxc_arch_t { +#else enum lxc_hostarch_t { +#endif lxc_seccomp_arch_all = 0, lxc_seccomp_arch_native, lxc_seccomp_arch_i386, @@ -351,8 +355,13 @@ int get_hostarch(void) return lxc_seccomp_arch_unknown; } +#ifdef HAVE_ISULAD +scmp_filter_ctx get_new_ctx(enum lxc_arch_t n_arch, + uint32_t default_policy_action, uint32_t *architectures) +#else scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, uint32_t default_policy_action, bool *needs_merge) +#endif { int ret; uint32_t arch; @@ -475,10 +484,17 @@ scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, return NULL; } TRACE("Removed native arch from main seccomp context"); - +#ifdef HAVE_ISULAD + *architectures = arch; +#else *needs_merge = true; +#endif } else { +#ifdef HAVE_ISULAD + *architectures = SCMP_ARCH_NATIVE; +#else *needs_merge = false; +#endif TRACE("Arch %d already present in main seccomp context", (int)n_arch); } @@ -510,7 +526,11 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, if (ret < 0) { errno = -ret; SYSERROR("Failed loading rule to reject force umount"); +#ifdef HAVE_ISULAD + return true; +#else return false; +#endif } INFO("Set seccomp rule to reject force umounts"); @@ -519,14 +539,24 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, nr = seccomp_syscall_resolve_name(line); if (nr == __NR_SCMP_ERROR) { +#ifdef HAVE_ISULAD + DEBUG("Failed to resolve syscall \"%s\"", line); + DEBUG("This syscall will NOT be handled by seccomp"); +#else WARN("Failed to resolve syscall \"%s\"", line); WARN("This syscall will NOT be handled by seccomp"); +#endif return true; } if (nr < 0) { +#ifdef HAVE_ISULAD + DEBUG("Got negative return value %d for syscall \"%s\"", nr, line); + DEBUG("This syscall will NOT be handled by seccomp"); +#else WARN("Got negative return value %d for syscall \"%s\"", nr, line); WARN("This syscall will NOT be handled by seccomp"); +#endif return true; } @@ -553,14 +583,42 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, rule->args_num, arg_cmp); if (ret < 0) { errno = -ret; +#ifdef HAVE_ISULAD + DEBUG("Failed loading rule for %s (nr %d action %d (%s))", + line, nr, rule->action, get_action_name(rule->action)); +#else SYSERROR("Failed loading rule for %s (nr %d action %d (%s))", line, nr, rule->action, get_action_name(rule->action)); +#endif return false; } return true; } +#ifdef HAVE_ISULAD +#define SCMP_ARCH_INDEX_MAX 3 + +struct scmp_ctx_info { + uint32_t architectures[SCMP_ARCH_INDEX_MAX]; + enum lxc_arch_t lxc_arch[SCMP_ARCH_INDEX_MAX]; + scmp_filter_ctx contexts[SCMP_ARCH_INDEX_MAX]; + bool needs_merge[SCMP_ARCH_INDEX_MAX]; +}; + +static int get_arch_index(enum lxc_arch_t arch, struct scmp_ctx_info *ctx) +{ + int i; + + for (i = 0; i < SCMP_ARCH_INDEX_MAX; i++) { + if (ctx->lxc_arch[i] == arch) + return i; + } + + return -1; +} +#endif + /* * v2 consists of * [x86] @@ -575,6 +633,493 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, * write * close */ +#ifdef HAVE_ISULAD +static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf) +{ + int ret; + char *p; + enum lxc_arch_t cur_rule_arch, native_arch; + bool blacklist = false; + uint32_t default_policy_action = -1, default_rule_action = -1; + struct seccomp_v2_rule rule; + struct scmp_ctx_info ctx; + + if (strncmp(line, "blacklist", 9) == 0) + blacklist = true; + else if (strncmp(line, "whitelist", 9) != 0) { + ERROR("Bad seccomp policy style \"%s\"", line); + return -1; + } + + p = strchr(line, ' '); + if (p) { + default_policy_action = get_v2_default_action(p + 1); + if (default_policy_action == -2) + return -1; + } + + /* for blacklist, allow any syscall which has no rule */ + if (blacklist) { + if (default_policy_action == -1) + default_policy_action = SCMP_ACT_ALLOW; + + if (default_rule_action == -1) + default_rule_action = SCMP_ACT_KILL; + } else { + if (default_policy_action == -1) + default_policy_action = SCMP_ACT_KILL; + + if (default_rule_action == -1) + default_rule_action = SCMP_ACT_ALLOW; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.architectures[0] = SCMP_ARCH_NATIVE; + ctx.architectures[1] = SCMP_ARCH_NATIVE; + ctx.architectures[2] = SCMP_ARCH_NATIVE; + native_arch = get_hostarch(); + cur_rule_arch = native_arch; + if (native_arch == lxc_seccomp_arch_amd64) { + cur_rule_arch = lxc_seccomp_arch_all; + + ctx.lxc_arch[0] = lxc_seccomp_arch_i386; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + + ctx.lxc_arch[1] = lxc_seccomp_arch_x32; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; + + ctx.lxc_arch[2] = lxc_seccomp_arch_amd64; + ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64, + default_policy_action, &ctx.architectures[2]); + if (!ctx.contexts[2]) + goto bad; +#ifdef SCMP_ARCH_PPC + } else if (native_arch == lxc_seccomp_arch_ppc64) { + cur_rule_arch = lxc_seccomp_arch_all; + + ctx.lxc_arch[0] = lxc_seccomp_arch_ppc; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + + ctx.lxc_arch[1] = lxc_seccomp_arch_ppc64; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_ppc64, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; +#endif +#ifdef SCMP_ARCH_ARM + } else if (native_arch == lxc_seccomp_arch_arm64) { + cur_rule_arch = lxc_seccomp_arch_all; + + ctx.lxc_arch[0] = lxc_seccomp_arch_arm; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + +#ifdef SCMP_ARCH_AARCH64 + ctx.lxc_arch[1] = lxc_seccomp_arch_arm64; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_arm64, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; +#endif +#endif +#ifdef SCMP_ARCH_MIPS + } else if (native_arch == lxc_seccomp_arch_mips64) { + cur_rule_arch = lxc_seccomp_arch_all; + + ctx.lxc_arch[0] = lxc_seccomp_arch_mips; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + + ctx.lxc_arch[1] = lxc_seccomp_arch_mips64n32; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; + + ctx.lxc_arch[2] = lxc_seccomp_arch_mips64; + ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64, + default_policy_action, &ctx.architectures[2]); + if (!ctx.contexts[2]) + goto bad; + } else if (native_arch == lxc_seccomp_arch_mipsel64) { + cur_rule_arch = lxc_seccomp_arch_all; + ctx.lxc_arch[0] = lxc_seccomp_arch_mipsel; + ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel, + default_policy_action, &ctx.architectures[0]); + if (!ctx.contexts[0]) + goto bad; + + ctx.lxc_arch[1] = lxc_seccomp_arch_mipsel64n32; + ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32, + default_policy_action, &ctx.architectures[1]); + if (!ctx.contexts[1]) + goto bad; + + ctx.lxc_arch[2] = lxc_seccomp_arch_mipsel64; + ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64, + default_policy_action, &ctx.architectures[2]); + if (!ctx.contexts[2]) + goto bad; +#endif + } + + if (default_policy_action != SCMP_ACT_KILL) { + ret = seccomp_reset(conf->seccomp.seccomp_ctx, default_policy_action); + if (ret != 0) { + ERROR("Error re-initializing Seccomp"); + return -1; + } + + ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0); + if (ret < 0) { + errno = -ret; + SYSERROR("Failed to turn off no-new-privs"); + return -1; + } + +#ifdef SCMP_FLTATR_ATL_TSKIP + ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1); + if (ret < 0) { + errno = -ret; + SYSWARN("Failed to turn on seccomp nop-skip, continuing"); + } +#endif + } + + while (getline(&line, line_bufsz, f) != -1) { + if (line[0] == '#') + continue; + + if (line[0] == '\0') + continue; + + remove_trailing_newlines(line); + + INFO("Processing \"%s\"", line); + if (line[0] == '[') { + /* Read the architecture for next set of rules. */ + if (strcmp(line, "[x86]") == 0 || + strcmp(line, "[X86]") == 0) { + if (native_arch != lxc_seccomp_arch_i386 && + native_arch != lxc_seccomp_arch_amd64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_i386; + } else if (strcmp(line, "[x32]") == 0 || + strcmp(line, "[X32]") == 0) { + if (native_arch != lxc_seccomp_arch_amd64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_x32; + } else if (strcmp(line, "[X86_64]") == 0 || + strcmp(line, "[x86_64]") == 0) { + if (native_arch != lxc_seccomp_arch_amd64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_amd64; + } else if (strcmp(line, "[all]") == 0 || + strcmp(line, "[ALL]") == 0) { + cur_rule_arch = lxc_seccomp_arch_all; + } +#ifdef SCMP_ARCH_ARM + else if (strcmp(line, "[arm]") == 0 || + strcmp(line, "[ARM]") == 0) { + if (native_arch != lxc_seccomp_arch_arm && + native_arch != lxc_seccomp_arch_arm64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_arm; + } +#endif +#ifdef SCMP_ARCH_AARCH64 + else if (strcmp(line, "[arm64]") == 0 || + strcmp(line, "[ARM64]") == 0) { + if (native_arch != lxc_seccomp_arch_arm64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_arm64; + } +#endif +#ifdef SCMP_ARCH_PPC64LE + else if (strcmp(line, "[ppc64le]") == 0 || + strcmp(line, "[PPC64LE]") == 0) { + if (native_arch != lxc_seccomp_arch_ppc64le) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_ppc64le; + } +#endif +#ifdef SCMP_ARCH_PPC64 + else if (strcmp(line, "[ppc64]") == 0 || + strcmp(line, "[PPC64]") == 0) { + if (native_arch != lxc_seccomp_arch_ppc64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_ppc64; + } +#endif +#ifdef SCMP_ARCH_PPC + else if (strcmp(line, "[ppc]") == 0 || + strcmp(line, "[PPC]") == 0) { + if (native_arch != lxc_seccomp_arch_ppc && + native_arch != lxc_seccomp_arch_ppc64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_ppc; + } +#endif +#ifdef SCMP_ARCH_MIPS + else if (strcmp(line, "[mips64]") == 0 || + strcmp(line, "[MIPS64]") == 0) { + if (native_arch != lxc_seccomp_arch_mips64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mips64; + } else if (strcmp(line, "[mips64n32]") == 0 || + strcmp(line, "[MIPS64N32]") == 0) { + if (native_arch != lxc_seccomp_arch_mips64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mips64n32; + } else if (strcmp(line, "[mips]") == 0 || + strcmp(line, "[MIPS]") == 0) { + if (native_arch != lxc_seccomp_arch_mips && + native_arch != lxc_seccomp_arch_mips64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mips; + } else if (strcmp(line, "[mipsel64]") == 0 || + strcmp(line, "[MIPSEL64]") == 0) { + if (native_arch != lxc_seccomp_arch_mipsel64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mipsel64; + } else if (strcmp(line, "[mipsel64n32]") == 0 || + strcmp(line, "[MIPSEL64N32]") == 0) { + if (native_arch != lxc_seccomp_arch_mipsel64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mipsel64n32; + } else if (strcmp(line, "[mipsel]") == 0 || + strcmp(line, "[MIPSEL]") == 0) { + if (native_arch != lxc_seccomp_arch_mipsel && + native_arch != lxc_seccomp_arch_mipsel64) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_mipsel; + } +#endif +#ifdef SCMP_ARCH_S390X + else if (strcmp(line, "[s390x]") == 0 || + strcmp(line, "[S390X]") == 0) { + if (native_arch != lxc_seccomp_arch_s390x) { + cur_rule_arch = lxc_seccomp_arch_unknown; + continue; + } + + cur_rule_arch = lxc_seccomp_arch_s390x; + } +#endif + else { + goto bad_arch; + } + + continue; + } + + /* irrelevant arch - i.e. arm on i386 */ + if (cur_rule_arch == lxc_seccomp_arch_unknown) + continue; + + memset(&rule, 0, sizeof(rule)); + /* read optional action which follows the syscall */ + ret = parse_v2_rules(line, default_rule_action, &rule); + if (ret != 0) { + ERROR("Failed to interpret seccomp rule"); + goto bad_rule; + } + + if (cur_rule_arch == native_arch) { + /* add for native arch */ + if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line, + conf->seccomp.seccomp_ctx, &rule)) + goto bad_rule; + + INFO("Added native rule for arch %d for %s action %d(%s)", + SCMP_ARCH_NATIVE, line, rule.action, + get_action_name(rule.action)); + } else if (cur_rule_arch != lxc_seccomp_arch_all) { + /* add for compat specified arch */ + int arch_index = get_arch_index(cur_rule_arch, &ctx); + if (arch_index < 0) + goto bad_arch; + + if (!do_resolve_add_rule(ctx.architectures[arch_index], line, + ctx.contexts[arch_index], &rule)) + goto bad_rule; + + INFO("Added compat rule for arch %d for %s action %d(%s)", + ctx.architectures[arch_index], line, rule.action, + get_action_name(rule.action)); + ctx.needs_merge[arch_index] = true; + } else { + /* add for all compat archs */ + if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line, + conf->seccomp.seccomp_ctx, &rule)) + goto bad_rule; + + INFO("Added native rule for arch %d for %s action %d(%s)", + SCMP_ARCH_NATIVE, line, rule.action, + get_action_name(rule.action)); + + if (ctx.architectures[0] != SCMP_ARCH_NATIVE) { + if (!do_resolve_add_rule(ctx.architectures[0], line, + ctx.contexts[0], &rule)) + goto bad_rule; + + INFO("Added compat rule for arch %d for %s action %d(%s)", + ctx.architectures[0], line, rule.action, + get_action_name(rule.action)); + ctx.needs_merge[0] = true; + } + + if (ctx.architectures[1] != SCMP_ARCH_NATIVE) { + if (!do_resolve_add_rule(ctx.architectures[1], line, + ctx.contexts[1], &rule)) + goto bad_rule; + + INFO("Added compat rule for arch %d for %s action %d(%s)", + ctx.architectures[1], line, rule.action, + get_action_name(rule.action)); + ctx.needs_merge[1] = true; + } + + if (ctx.architectures[2] != SCMP_ARCH_NATIVE) { + if (!do_resolve_add_rule(ctx.architectures[2], line, + ctx.contexts[2], &rule)) + goto bad_rule; + + INFO("Added native rule for arch %d for %s action %d(%s)", + ctx.architectures[2], line, rule.action, + get_action_name(rule.action)); + ctx.needs_merge[2] = true; + } + } + + } + + INFO("Merging compat seccomp contexts into main context"); + if (ctx.contexts[0]) { + if (ctx.needs_merge[0]) { + ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[0]); + if (ret < 0) { + ERROR("%s - Failed to merge first compat seccomp " + "context into main context", strerror(-ret)); + goto bad; + } + + TRACE("Merged first compat seccomp context into main context"); + } else { + seccomp_release(ctx.contexts[0]); + ctx.contexts[0] = NULL; + } + } + + if (ctx.contexts[1]) { + if (ctx.needs_merge[1]) { + ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[1]); + if (ret < 0) { + ERROR("%s - Failed to merge second compat seccomp " + "context into main context", strerror(-ret)); + goto bad; + } + + TRACE("Merged second compat seccomp context into main context"); + } else { + seccomp_release(ctx.contexts[1]); + ctx.contexts[1] = NULL; + } + } + + if (ctx.contexts[2]) { + if (ctx.needs_merge[2]) { + ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[2]); + if (ret < 0) { + ERROR("%s - Failed to merge third compat seccomp " + "context into main context", strerror(-ret)); + goto bad; + } + + TRACE("Merged third compat seccomp context into main context"); + } else { + seccomp_release(ctx.contexts[2]); + ctx.contexts[2] = NULL; + } + } + + free(line); + return 0; + +bad_arch: + ERROR("Unsupported architecture \"%s\"", line); + +bad_rule: +bad: + if (ctx.contexts[0]) + seccomp_release(ctx.contexts[0]); + + if (ctx.contexts[1]) + seccomp_release(ctx.contexts[1]); + + if (ctx.contexts[2]) + seccomp_release(ctx.contexts[2]); + + free(line); + + return -1; +} +#else static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf) { int ret; @@ -1067,6 +1612,7 @@ bad: return -1; } +#endif #else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */ static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf) { -- 1.8.3.1