From 9e139dac0cbfc7252b672a23cfc54d62e1989223 Mon Sep 17 00:00:00 2001 From: LiFeng Date: Mon, 14 Jan 2019 21:38:07 -0500 Subject: [PATCH 029/131] add start timeout to limit start time Signed-off-by: LiFeng --- src/lxc/conf.c | 6 +-- src/lxc/execute.c | 4 +- src/lxc/lxc.h | 8 +-- src/lxc/lxccontainer.c | 21 +++++++- src/lxc/lxccontainer.h | 16 ++++++ src/lxc/start.c | 106 ++++++++++++++++++++++++++++++++++++-- src/lxc/start.h | 6 +-- src/lxc/tools/arguments.h | 2 + src/lxc/tools/lxc_start.c | 26 ++++++++++ 9 files changed, 175 insertions(+), 20 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index f429491e..439353b2 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -4381,14 +4381,12 @@ void* wait_ocihook_timeout(void *arg) if (alive) { ERROR("%s:%d: running %s hook caused \"hook ran past specified timeout of %.1fs\"", - __FILE__, __LINE__, - (conf->which == LXCHOOK_START_HOST) ? "prestart" : lxchook_names[conf->which], + __FILE__, __LINE__, lxchook_names[conf->which], (double)conf->timeout); if (conf->errfd >= 0) { lxc_write_error_message(conf->errfd, "%s:%d: running %s hook caused \"hook ran past specified timeout of %.1fs\"", - __FILE__, __LINE__, - (conf->which == LXCHOOK_START_HOST) ? "prestart" : lxchook_names[conf->which], + __FILE__, __LINE__, lxchook_names[conf->which], (double)conf->timeout); } diff --git a/src/lxc/execute.c b/src/lxc/execute.c index 45ca67e3..d388e633 100644 --- a/src/lxc/execute.c +++ b/src/lxc/execute.c @@ -111,12 +111,12 @@ static struct lxc_operations execute_start_ops = { int lxc_execute(const char *name, char *const argv[], int quiet, struct lxc_handler *handler, const char *lxcpath, - bool daemonize, int *error_num) + bool daemonize, int *error_num, unsigned int start_timeout) { struct execute_args args = {.argv = argv, .quiet = quiet}; TRACE("Doing lxc_execute"); handler->conf->is_execute = true; return __lxc_start(name, handler, &execute_start_ops, &args, lxcpath, - daemonize, error_num); + daemonize, error_num, start_timeout); } diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h index 22e35096..687b4b28 100644 --- a/src/lxc/lxc.h +++ b/src/lxc/lxc.h @@ -55,7 +55,7 @@ struct lxc_handler; */ extern int lxc_start(const char *name, char *const argv[], struct lxc_handler *handler, const char *lxcpath, - bool daemonize, int *error_num); + bool daemonize, int *error_num, unsigned int start_timeout); /* * Start the specified command inside an application container @@ -66,9 +66,9 @@ extern int lxc_start(const char *name, char *const argv[], * @daemonize : whether or not the container is daemonized * Returns 0 on success, < 0 otherwise */ -extern int lxc_execute(const char *name, char *const argv[], int quiet, - struct lxc_handler *handler, const char *lxcpath, - bool daemonize, int *error_num); +int lxc_execute(const char *name, char *const argv[], int quiet, + struct lxc_handler *handler, const char *lxcpath, + bool daemonize, int *error_num, unsigned int start_timeout); /* * Close the fd associated with the monitoring diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index e6272fcd..beae459b 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -1169,10 +1169,10 @@ reboot: if (useinit) ret = lxc_execute(c->name, argv, 1, handler, c->config_path, - c->daemonize, &c->error_num); + c->daemonize, &c->error_num, c->start_timeout); else ret = lxc_start(c->name, argv, handler, c->config_path, - c->daemonize, &c->error_num); + c->daemonize, &c->error_num, c->start_timeout); if (conf->reboot == REBOOT_REQ) { INFO("Container requested reboot"); @@ -5100,6 +5100,22 @@ static bool do_lxcapi_set_container_info_file(struct lxc_container *c, const cha WRAP_API_1(bool, lxcapi_set_container_info_file, const char *) +/* isulad add start timeout */ +static bool do_lxcapi_set_start_timeout(struct lxc_container *c, unsigned int start_timeout) +{ + if (!c || !c->lxc_conf) + return false; + if (container_mem_lock(c)) { + ERROR("Error getting mem lock"); + return false; + } + c->start_timeout = start_timeout; + container_mem_unlock(c); + return true; +} + +WRAP_API_1(bool, lxcapi_set_start_timeout, unsigned int) + struct lxc_container *lxc_container_new(const char *name, const char *configpath) { struct lxc_container *c; @@ -5231,6 +5247,7 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath /* isulad add begin */ c->set_terminal_init_fifos = lxcapi_set_terminal_default_fifos; c->set_container_info_file = lxcapi_set_container_info_file; + c->set_start_timeout = lxcapi_set_start_timeout; /* isulad add end */ return c; diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h index 5d23cc7e..77de7040 100644 --- a/src/lxc/lxccontainer.h +++ b/src/lxc/lxccontainer.h @@ -94,6 +94,12 @@ struct lxc_container { */ char *exit_fifo; + /*! isulad: + * \private + * start_timeout. + */ + unsigned int start_timeout; + /*! * \private * Container semaphore lock. @@ -880,6 +886,16 @@ struct lxc_container { * \return \c true on success, else \c false. */ bool (*set_container_info_file) (struct lxc_container *c, const char *info_file); + + /*! isulad add + * \brief An API call to set start timeout + * + * \param c Container. + * \param start_timeout Value of start timeout. + * + * \return \c true on success, else \c false. + */ + bool (*set_start_timeout)(struct lxc_container *c, unsigned int start_timeout); }; /*! diff --git a/src/lxc/start.c b/src/lxc/start.c index 63f5af88..f7be9e43 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -93,7 +93,22 @@ extern void mod_all_rdeps(struct lxc_container *c, bool inc); static bool do_destroy_container(struct lxc_handler *handler); static int lxc_rmdir_onedev_wrapper(void *data); static void lxc_destroy_container_on_signal(struct lxc_handler *handler, - const char *name); + const char *name); + +/* isulad: start timeout thread */ +typedef enum { + START_INIT, + START_TIMEOUT, + START_MAX, +} start_timeout_t; + +static start_timeout_t global_timeout_state = START_INIT; +static sem_t global_timeout_sem; + +struct start_timeout_conf { + unsigned int timeout; + int errfd; +}; static void print_top_failing_dir(const char *path) { @@ -1897,6 +1912,12 @@ static int lxc_spawn(struct lxc_handler *handler) goto out_delete_net; } + if (START_TIMEOUT == global_timeout_state) { + //lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name); + ERROR("Starting the container \"%s\" timeout.", name); + goto out_delete_net; + } + /* Tell the child to complete its initialization and wait for it to exec * or return an error. (The child will never return * LXC_SYNC_READY_START+1. It will either close the sync pipe, causing @@ -1936,7 +1957,13 @@ static int lxc_spawn(struct lxc_handler *handler) ret = run_lxc_hooks(name, "oci-poststart", conf, oci_hook_args); if (ret < 0) { ERROR("Failed to run oci poststart hooks"); - goto out_delete_net; + goto out_abort; + } + + if (START_TIMEOUT == global_timeout_state) { + //lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name); + ERROR("Starting the container \"%s\" timeout.", name); + goto out_abort; } ret = lxc_set_state(name, handler, RUNNING); @@ -1964,12 +1991,71 @@ out_abort: return -1; } +/* isulad: start timeout thread function */ +static void* wait_start_timeout(void *arg) +{ + struct start_timeout_conf *conf = (struct start_timeout_conf *)arg; + + sem_post(&global_timeout_sem); + + if (!conf || conf->timeout < 1) + goto out; + + sleep(conf->timeout); + + global_timeout_state = START_TIMEOUT; + +out: + free(conf); + return ((void *)0); +} + +/* isulad: create start timeout thread */ +static int create_start_timeout_thread(struct lxc_conf *conf, unsigned int start_timeout) +{ + int ret = 0; + pthread_t ptid; + pthread_attr_t attr; + struct start_timeout_conf *timeout_conf = NULL; + + if (sem_init(&global_timeout_sem, 0, 0)) { + ERROR("Failed to init start timeout semaphore");/*lint !e613*/ + ret = -1; + return ret; + } + + timeout_conf = malloc(sizeof(struct start_timeout_conf)); + if (!timeout_conf) { + ERROR("Failed to malloc start timeout conf"); + ret = -1; + goto out; + } + + memset(timeout_conf, 0, sizeof(struct start_timeout_conf)); + timeout_conf->errfd = conf->errpipe[1]; + timeout_conf->timeout = start_timeout; + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + ret = pthread_create(&ptid, &attr, wait_start_timeout, timeout_conf); + if (ret != 0) { + ERROR("Create start wait timeout thread failed"); + free(timeout_conf); + goto out; + } + + sem_wait(&global_timeout_sem); +out: + sem_destroy(&global_timeout_sem); + return ret; +} + // isulad: send '128 + signal' if container is killed by signal. #define ExitSignalOffset 128 int __lxc_start(const char *name, struct lxc_handler *handler, struct lxc_operations* ops, void *data, const char *lxcpath, - bool daemonize, int *error_num) + bool daemonize, int *error_num, unsigned int start_timeout) { int ret, status, exit_code; struct lxc_conf *conf = handler->conf; @@ -1983,8 +2069,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler, handler->data = data; handler->daemonize = daemonize; + /* isulad: add start timeout limit */ + if (start_timeout > 0) { + ret = create_start_timeout_thread(conf, start_timeout); + if (ret) { + ERROR("Failed to create start timeout thread for container \"%s\".", name); + goto out_fini_nonet; + } + } + if (!attach_block_device(handler->conf)) { ERROR("Failed to attach block device"); + ret = -1; goto out_fini_nonet; } @@ -2114,14 +2210,14 @@ static struct lxc_operations start_ops = { }; int lxc_start(const char *name, char *const argv[], struct lxc_handler *handler, - const char *lxcpath, bool daemonize, int *error_num) + const char *lxcpath, bool daemonize, int *error_num, unsigned int start_timeout) { struct start_args start_arg = { .argv = argv, }; TRACE("Doing lxc_start"); - return __lxc_start(name, handler, &start_ops, &start_arg, lxcpath, daemonize, error_num); + return __lxc_start(name, handler, &start_ops, &start_arg, lxcpath, daemonize, error_num, start_timeout); } static void lxc_destroy_container_on_signal(struct lxc_handler *handler, diff --git a/src/lxc/start.h b/src/lxc/start.h index f59bf549..a96f2aed 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -174,9 +174,9 @@ extern void lxc_fini(const char *name, struct lxc_handler *handler); */ extern int lxc_check_inherited(struct lxc_conf *conf, bool closeall, int *fds_to_ignore, size_t len_fds); -extern int __lxc_start(const char *, struct lxc_handler *, - struct lxc_operations *, void *, const char *, bool, - int *); +extern int __lxc_start(const char *name, struct lxc_handler *handler, + struct lxc_operations* ops, void *data, const char *lxcpath, + bool daemonize, int *error_num, unsigned int start_timeout); extern int resolve_clone_flags(struct lxc_handler *handler); diff --git a/src/lxc/tools/arguments.h b/src/lxc/tools/arguments.h index 047e9f16..afab9f54 100644 --- a/src/lxc/tools/arguments.h +++ b/src/lxc/tools/arguments.h @@ -65,6 +65,7 @@ struct lxc_arguments { char *terminal_fifos[2]; /* isulad add, fifos used to redirct stdin/out/err */ const char *container_info; /* isulad: file used to store pid and ppid info of container */ const char *exit_monitor_fifo; /* isulad: fifo used to monitor state of monitor process */ + unsigned int start_timeout; /* isulad: Seconds for waiting on a container to start before it is killed*/ /* for lxc-console */ unsigned int ttynum; @@ -180,6 +181,7 @@ struct lxc_arguments { #define OPT_OUTPUT_FIFO OPT_USAGE - 8 #define OPT_CONTAINER_INFO OPT_USAGE - 9 #define OPT_EXIT_FIFO OPT_USAGE - 10 +#define OPT_START_TIMEOUT OPT_USAGE - 11 /* isulad add end*/ extern int lxc_arguments_parse(struct lxc_arguments *args, int argc, diff --git a/src/lxc/tools/lxc_start.c b/src/lxc/tools/lxc_start.c index 60c7d70b..f37f8a6a 100644 --- a/src/lxc/tools/lxc_start.c +++ b/src/lxc/tools/lxc_start.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -74,6 +75,7 @@ static const struct option my_longopts[] = { {"out-fifo", required_argument, 0, OPT_OUTPUT_FIFO}, {"container-pidfile", required_argument, 0, OPT_CONTAINER_INFO}, {"exit-fifo", required_argument, 0, OPT_EXIT_FIFO}, + {"start-timeout", required_argument, 0, OPT_START_TIMEOUT}, /* isulad add end */ LXC_COMMON_OPTIONS }; @@ -108,6 +110,18 @@ Options :\n\ .pidfile = NULL, }; +static bool is_non_negative_num(const char *s) +{ + if (!s || !strcmp(s, "")) + return false; + while(*s != '\0') { + if(!isdigit(*s)) + return false; + ++s; + } + return true; +} + static int my_parser(struct lxc_arguments *args, int c, char *arg) { switch (c) { @@ -158,6 +172,13 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) case OPT_EXIT_FIFO: args->exit_monitor_fifo = arg; break; + case OPT_START_TIMEOUT: + if(!is_non_negative_num(arg)) { + fprintf(stderr, "Error start timeout parameter:%s.\n", arg); + return -1; + } + args->start_timeout = (unsigned int)atoi(arg); + break; } return 0; } @@ -341,6 +362,11 @@ int main(int argc, char *argv[]) c->exit_fifo = strdup(my_args.exit_monitor_fifo); } + /* isulad: add start timeout */ + if(my_args.start_timeout) { + c->set_start_timeout(c, my_args.start_timeout); + } + if (my_args.console) if (!c->set_config_item(c, "lxc.console.path", my_args.console)) goto out; -- 2.23.0