iSulad/0051-2303-use-a-timeout-epoll-loop-to-ensure-complete-dat.patch
zhongtao c36bc934aa !638 upgrade from upstream
* upgrade from upstream
2023-12-21 02:03:18 +00:00

198 lines
6.4 KiB
Diff

From 7d1b8d25468528a59318430d50d839032f2c1a07 Mon Sep 17 00:00:00 2001
From: zhongtao <zhongtao17@huawei.com>
Date: Tue, 12 Dec 2023 12:26:32 +0000
Subject: [PATCH 51/64] !2303 use a timeout epoll loop to ensure complete data
reception * use a timeout epoll loop to ensure complete data reception
---
src/cmd/isulad-shim/process.c | 105 +++++++++-------------------------
src/cmd/isulad-shim/process.h | 1 -
2 files changed, 26 insertions(+), 80 deletions(-)
diff --git a/src/cmd/isulad-shim/process.c b/src/cmd/isulad-shim/process.c
index e8cb9b32..97524f1a 100644
--- a/src/cmd/isulad-shim/process.c
+++ b/src/cmd/isulad-shim/process.c
@@ -169,6 +169,7 @@ static int get_exec_winsize(const char *buf, struct winsize *wsize)
static int sync_exit_cb(int fd, uint32_t events, void *cbdata, isula_epoll_descr_t *descr)
{
+ isula_epoll_remove_handler(descr, fd);
return EPOLL_LOOP_HANDLE_CLOSE;
}
@@ -364,23 +365,14 @@ static int stdout_cb(int fd, uint32_t events, void *cbdata, isula_epoll_descr_t
int r_count = 0;
int w_count = 0;
- if (events & EPOLLHUP) {
- return EPOLL_LOOP_HANDLE_CLOSE;
- }
-
- if (!(events & EPOLLIN)) {
- return EPOLL_LOOP_HANDLE_CONTINUE;
- }
-
(void)memset(p->buf, 0, DEFAULT_IO_COPY_BUF);
- if (p->block_read) {
- r_count = isula_file_read_nointr(fd, p->buf, DEFAULT_IO_COPY_BUF);
- } else {
- r_count = read(fd, p->buf, DEFAULT_IO_COPY_BUF);
- }
- if (r_count <= 0) {
- return EPOLL_LOOP_HANDLE_CLOSE;
+ r_count = isula_file_read_nointr(fd, p->buf, DEFAULT_IO_COPY_BUF);
+ if (r_count <= 0 ) {
+ isula_epoll_remove_handler(descr, fd);
+ // fd cannot be closed here, which will cause the container process to exit abnormally
+ // due to terminal fd receiving the sighup signal.
+ return EPOLL_LOOP_HANDLE_CONTINUE;
}
shim_write_container_log_file(p->terminal, STDID_OUT, p->buf, r_count);
@@ -419,23 +411,14 @@ static int stderr_cb(int fd, uint32_t events, void *cbdata, isula_epoll_descr_t
int r_count = 0;
int w_count = 0;
- if (events & EPOLLHUP) {
- return EPOLL_LOOP_HANDLE_CLOSE;
- }
-
- if (!(events & EPOLLIN)) {
- return EPOLL_LOOP_HANDLE_CONTINUE;
- }
-
(void)memset(p->buf, 0, DEFAULT_IO_COPY_BUF);
- if (p->block_read) {
- r_count = isula_file_read_nointr(fd, p->buf, DEFAULT_IO_COPY_BUF);
- } else {
- r_count = read(fd, p->buf, DEFAULT_IO_COPY_BUF);
- }
- if (r_count <= 0) {
- return EPOLL_LOOP_HANDLE_CLOSE;
+ r_count = isula_file_read_nointr(fd, p->buf, DEFAULT_IO_COPY_BUF);
+ if (r_count <= 0 ) {
+ isula_epoll_remove_handler(descr, fd);
+ // fd cannot be closed here, which will cause the container process to exit abnormally
+ // due to terminal fd receiving the sighup signal.
+ return EPOLL_LOOP_HANDLE_CONTINUE;
}
shim_write_container_log_file(p->terminal, STDID_ERR, p->buf, r_count);
@@ -474,18 +457,11 @@ static int resize_cb(int fd, uint32_t events, void *cbdata, isula_epoll_descr_t
int r_count = 0;
int resize_fd = -1;
- if (events & EPOLLHUP) {
- return EPOLL_LOOP_HANDLE_CLOSE;
- }
-
- if (!(events & EPOLLIN)) {
- return EPOLL_LOOP_HANDLE_CONTINUE;
- }
-
(void)memset(p->buf, 0, DEFAULT_IO_COPY_BUF);
r_count = isula_file_read_nointr(fd, p->buf, DEFAULT_IO_COPY_BUF);
if (r_count <= 0) {
- return EPOLL_LOOP_HANDLE_CLOSE;
+ close(fd);
+ return EPOLL_LOOP_HANDLE_CONTINUE;
}
resize_fd = p->recv_fd;
@@ -915,8 +891,6 @@ static int open_generic_io(process_t *p, isula_epoll_descr_t *descr)
static void *io_epoll_loop(void *data)
{
int ret = 0;
- int fd_out = -1;
- int fd_err = -1;
process_t *p = (process_t *)data;
isula_epoll_descr_t descr;
@@ -953,49 +927,23 @@ static void *io_epoll_loop(void *data)
(void)sem_post(&p->sem_mainloop);
+ // th frist epoll_loop will exit in the following scenarios:
+ // 1. Receive sync fd event
+ // 2. stdin fd receive EPOLLHUP event
+ // 3. stdin fd read failed
ret = isula_epoll_loop(&descr, -1);
if (ret != 0) {
write_message(ERR_MSG, "epoll loop failed");
exit(EXIT_FAILURE);
}
- // in order to avoid data loss, set fd non-block and read it
- p->block_read = false;
- if (p->state->terminal) {
- fd_out = p->recv_fd;
- } else {
- fd_out = p->shim_io->out;
- fd_err = p->shim_io->err;
- }
-
- if (fd_out > 0) {
- ret = isula_set_non_block(fd_out);
- if (ret != SHIM_OK) {
- write_message(ERR_MSG, "set fd %d non_block failed:%d", fd_out, SHIM_SYS_ERR(errno));
- exit(EXIT_FAILURE);
- }
-
- for (;;) {
- ret = stdout_cb(fd_out, EPOLLIN, p, &descr);
- if (ret == EPOLL_LOOP_HANDLE_CLOSE) {
- break;
- }
- }
- }
-
- if (fd_err > 0) {
- ret = isula_set_non_block(fd_err);
- if (ret != SHIM_OK) {
- write_message(ERR_MSG, "set fd %d non_block failed:%d", fd_err, SHIM_SYS_ERR(errno));
- exit(EXIT_FAILURE);
- }
-
- for (;;) {
- ret = stderr_cb(fd_err, EPOLLIN, p, &descr);
- if (ret == EPOLL_LOOP_HANDLE_CLOSE) {
- break;
- }
- }
+ // use a timeout epoll loop to ensure complete data reception
+ // th second epoll_loop will exit in the following scenarios:
+ // 1. both stdout fd and stderr fd failed to read
+ // 2. no event received within 100 milliseconds
+ ret = isula_epoll_loop(&descr, 100);
+ if (ret != 0) {
+ write_message(ERR_MSG, "Repeat the epoll loop to ensure that all data is transferred");
}
return NULL;
@@ -1220,7 +1168,6 @@ process_t *new_process(char *id, char *bundle, char *runtime)
p->bundle = bundle;
p->runtime = runtime;
p->state = p_state;
- p->block_read = true;
p->console_sock_path = NULL;
p->exit_fd = -1;
p->io_loop_fd = -1;
diff --git a/src/cmd/isulad-shim/process.h b/src/cmd/isulad-shim/process.h
index 5607316c..32ba7366 100644
--- a/src/cmd/isulad-shim/process.h
+++ b/src/cmd/isulad-shim/process.h
@@ -55,7 +55,6 @@ typedef struct process {
int sync_fd;
int listen_fd;
int recv_fd;
- bool block_read;
log_terminal *terminal;
stdio_t *stdio; // shim to on runtime side, in:r out/err: w
stdio_t *shim_io; // shim io on isulad side, in: w out/err: r
--
2.42.0