From 94f2e0c57d2668666f868cb32984e7833be8f296 Mon Sep 17 00:00:00 2001 From: Lemmy Huang Date: Wed, 15 Mar 2023 11:17:23 +0800 Subject: [PATCH 2/2] refactor kernel_wait of epoll Signed-off-by: Lemmy Huang --- src/common/gazelle_dfx_msg.h | 5 +- src/common/gazelle_opt.h | 2 + src/lstack/api/lstack_epoll.c | 42 ++++------- src/lstack/core/dir.mk | 3 +- src/lstack/core/lstack_kernel_wait.c | 86 ++++++++++++++++++++++ src/lstack/core/lstack_protocol_stack.c | 51 +------------ src/lstack/include/lstack_kernel_wait.h | 35 +++++++++ src/lstack/include/lstack_protocol_stack.h | 6 +- 8 files changed, 147 insertions(+), 83 deletions(-) create mode 100644 src/lstack/core/lstack_kernel_wait.c create mode 100644 src/lstack/include/lstack_kernel_wait.h diff --git a/src/common/gazelle_dfx_msg.h b/src/common/gazelle_dfx_msg.h index 674f2d7..7f75d12 100644 --- a/src/common/gazelle_dfx_msg.h +++ b/src/common/gazelle_dfx_msg.h @@ -17,12 +17,11 @@ #include #include +#include "gazelle_opt.h" + #define GAZELLE_CLIENT_NUM_MIN 1 #define GAZELLE_LOG_LEVEL_MAX 10 -/* maybe it should be consistent with MEMP_NUM_TCP_PCB */ -#define GAZELLE_LSTACK_MAX_CONN (20000 + 2000) // same as MAX_CLIENTS + RESERVED_CLIENTS in lwipopts.h - enum GAZELLE_STAT_MODE { GAZELLE_STAT_LTRAN_SHOW = 0, GAZELLE_STAT_LTRAN_SHOW_RATE, diff --git a/src/common/gazelle_opt.h b/src/common/gazelle_opt.h index 76b89ce..fb903af 100644 --- a/src/common/gazelle_opt.h +++ b/src/common/gazelle_opt.h @@ -25,6 +25,8 @@ #define PROTOCOL_STACK_MAX 32 #define KERNEL_EPOLL_MAX 512 +/* it should be consistent with MEMP_NUM_TCP_PCB in lwipopts.h */ +#define GAZELLE_LSTACK_MAX_CONN (20000 + 2000) #define ETHER_ADDR_LEN 6 diff --git a/src/lstack/api/lstack_epoll.c b/src/lstack/api/lstack_epoll.c index da29590..39cc3de 100644 --- a/src/lstack/api/lstack_epoll.c +++ b/src/lstack/api/lstack_epoll.c @@ -35,6 +35,7 @@ #include "lstack_lwip.h" #include "lstack_protocol_stack.h" #include "posix/lstack_epoll.h" +#include "lstack_kernel_wait.h" #define EPOLL_KERNEL_INTERVAL 10 /* ms */ #define SEC_TO_NSEC 1000000000 @@ -43,8 +44,6 @@ #define POLL_KERNEL_EVENTS 32 static void update_epoll_max_stack(struct wakeup_poll *wakeup); -static void change_epollfd_kernel_thread(struct wakeup_poll *wakeup, struct protocol_stack *old_stack, - struct protocol_stack *new_stack); void add_sock_event(struct lwip_sock *sock, uint32_t event) { @@ -167,6 +166,11 @@ static void raise_pending_events(struct wakeup_poll *wakeup, struct lwip_sock *s } } +static struct kernel_wait *get_stack_knwait(struct protocol_stack *stack) +{ + return stack == NULL ? NULL : &stack->knwait; +} + int32_t lstack_do_epoll_create(int32_t fd) { if (fd < 0) { @@ -213,7 +217,8 @@ int32_t lstack_do_epoll_create(int32_t fd) sock->wakeup = wakeup; update_epoll_max_stack(wakeup); - change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, wakeup->max_stack); + kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack), + get_stack_knwait(wakeup->max_stack)); wakeup->bind_stack = wakeup->max_stack; if (get_global_cfg_params()->app_bind_numa) { bind_to_stack_numa(wakeup->bind_stack); @@ -431,29 +436,12 @@ static int32_t poll_lwip_event(struct pollfd *fds, nfds_t nfds) return event_num; } -static void change_epollfd_kernel_thread(struct wakeup_poll *wakeup, struct protocol_stack *old_stack, - struct protocol_stack *new_stack) -{ - if (old_stack) { - if (posix_api->epoll_ctl_fn(old_stack->epollfd, EPOLL_CTL_DEL, wakeup->epollfd, NULL) != 0) { - LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno); - } - } - - /* avoid kernel thread post too much, use EPOLLET */ - struct epoll_event event; - event.data.ptr = wakeup; - event.events = EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLET; - if (posix_api->epoll_ctl_fn(new_stack->epollfd, EPOLL_CTL_ADD, wakeup->epollfd, &event) != 0) { - LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno); - } -} - static void epoll_bind_statck(struct wakeup_poll *wakeup) { if (wakeup->bind_stack != wakeup->max_stack && wakeup->max_stack) { bind_to_stack_numa(wakeup->max_stack); - change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, wakeup->max_stack); + kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack), + get_stack_knwait(wakeup->max_stack)); wakeup->bind_stack = wakeup->max_stack; } } @@ -555,7 +543,8 @@ static int32_t init_poll_wakeup_data(struct wakeup_poll *wakeup) int32_t stack_count[PROTOCOL_STACK_MAX] = {0}; uint16_t bind_id = find_max_cnt_stack(stack_count, stack_group->stack_num, wakeup->bind_stack); - change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, stack_group->stacks[bind_id]); + kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack), + get_stack_knwait(stack_group->stacks[bind_id])); wakeup->bind_stack = stack_group->stacks[bind_id]; if (get_global_cfg_params()->app_bind_numa) { bind_to_stack_numa(wakeup->bind_stack); @@ -585,7 +574,7 @@ static void resize_kernel_poll(struct wakeup_poll *wakeup, nfds_t nfds) wakeup->last_max_nfds = nfds; } -static void poll_bind_statck(struct wakeup_poll *wakeup, int32_t *stack_count) +static void poll_bind_stack(struct wakeup_poll *wakeup, int32_t *stack_count) { struct protocol_stack_group *stack_group = get_protocol_stack_group(); @@ -594,7 +583,8 @@ static void poll_bind_statck(struct wakeup_poll *wakeup, int32_t *stack_count) return; } - change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, stack_group->stacks[bind_id]); + kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack), + get_stack_knwait(stack_group->stacks[bind_id])); bind_to_stack_numa(stack_group->stacks[bind_id]); wakeup->bind_stack = stack_group->stacks[bind_id]; } @@ -663,7 +653,7 @@ static void poll_init(struct wakeup_poll *wakeup, struct pollfd *fds, nfds_t nfd wakeup->last_nfds = nfds; if (get_global_cfg_params()->app_bind_numa) { - poll_bind_statck(wakeup, stack_count); + poll_bind_stack(wakeup, stack_count); } } diff --git a/src/lstack/core/dir.mk b/src/lstack/core/dir.mk index 88c1e08..0908f73 100644 --- a/src/lstack/core/dir.mk +++ b/src/lstack/core/dir.mk @@ -8,6 +8,7 @@ # PURPOSE. # See the Mulan PSL v2 for more details. -SRC = lstack_init.c lstack_cfg.c lstack_dpdk.c lstack_control_plane.c lstack_stack_stat.c lstack_lwip.c lstack_protocol_stack.c lstack_thread_rpc.c +SRC = lstack_init.c lstack_cfg.c lstack_dpdk.c lstack_control_plane.c lstack_stack_stat.c lstack_lwip.c \ + lstack_protocol_stack.c lstack_thread_rpc.c lstack_kernel_wait.c $(eval $(call register_dir, core, $(SRC))) diff --git a/src/lstack/core/lstack_kernel_wait.c b/src/lstack/core/lstack_kernel_wait.c new file mode 100644 index 0000000..9da4f59 --- /dev/null +++ b/src/lstack/core/lstack_kernel_wait.c @@ -0,0 +1,86 @@ +/* +* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. +* gazelle is licensed under the Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +* PURPOSE. +* See the Mulan PSL v2 for more details. +*/ + +#include +#include + +#include "lstack_log.h" +#include "lstack_kernel_wait.h" +#include "lstack_protocol_stack.h" +#include "posix/lstack_epoll.h" + +#define KERNEL_EVENT_100us 100 + +void kernel_wait_change_thread(struct wakeup_poll *wakeup, struct kernel_wait *old_knwait, + struct kernel_wait *new_knwait) +{ + if (old_knwait) { + if (posix_api->epoll_ctl_fn(old_knwait->epollfd, EPOLL_CTL_DEL, wakeup->epollfd, NULL) != 0) { + LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno); + } + } + + /* avoid kernel thread post too much, use EPOLLET */ + struct epoll_event event; + event.data.ptr = wakeup; + event.events = EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLET; + if (posix_api->epoll_ctl_fn(new_knwait->epollfd, EPOLL_CTL_ADD, wakeup->epollfd, &event) != 0) { + LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno); + } +} + +void wakeup_kernel_wait(struct kernel_wait *knwait, struct list_node *wakeup_list, uint16_t queue_id) +{ + if (knwait->kernel_event_num == 0) { + return; + } + + for (int32_t i = 0; i < knwait->kernel_event_num; i++) { + struct wakeup_poll *wakeup = knwait->kernel_events[i].data.ptr; + if (wakeup->type == WAKEUP_CLOSE) { + continue; + } + + __atomic_store_n(&wakeup->have_kernel_event, true, __ATOMIC_RELEASE); + if (list_is_null(&wakeup->wakeup_list[queue_id])) { + list_add_node(wakeup_list, &wakeup->wakeup_list[queue_id]); + } + } + + knwait->kernel_event_num = 0; +} + +void* kernel_wait_thread(void *arg) +{ + uint16_t queue_id = *(uint16_t *)arg; + struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id]; + struct kernel_wait *knwait = &stack->knwait; + + bind_to_stack_numa(stack); + + knwait->epollfd = posix_api->epoll_create_fn(GAZELLE_LSTACK_MAX_CONN); + if (knwait->epollfd < 0) { + LSTACK_LOG(ERR, LSTACK, "epoll_create_fn errno=%d\n", errno); + return NULL; + } + + LSTACK_LOG(INFO, LSTACK, "kernelevent_%02hu start\n", queue_id); + + for (;;) { + knwait->kernel_event_num = posix_api->epoll_wait_fn(knwait->epollfd, knwait->kernel_events, KERNEL_EPOLL_MAX, -1); + while (knwait->kernel_event_num > 0) { + usleep(KERNEL_EVENT_100us); + } + } + + return NULL; +} \ No newline at end of file diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c index 93204d1..d78eec2 100644 --- a/src/lstack/core/lstack_protocol_stack.c +++ b/src/lstack/core/lstack_protocol_stack.c @@ -36,8 +36,6 @@ #include "posix/lstack_epoll.h" #include "lstack_stack_stat.h" -#define KERNEL_EVENT_100us 100 - static PER_THREAD struct protocol_stack *g_stack_p = NULL; static struct protocol_stack_group g_stack_group = {0}; @@ -250,25 +248,6 @@ static void* gazelle_wakeup_thread(void *arg) return NULL; } -static void* gazelle_kernelevent_thread(void *arg) -{ - uint16_t queue_id = *(uint16_t *)arg; - struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id]; - - bind_to_stack_numa(stack); - - LSTACK_LOG(INFO, LSTACK, "kernelevent_%02hu start\n", queue_id); - - for (;;) { - stack->kernel_event_num = posix_api->epoll_wait_fn(stack->epollfd, stack->kernel_events, KERNEL_EPOLL_MAX, -1); - while (stack->kernel_event_num > 0) { - usleep(KERNEL_EVENT_100us); - } - } - - return NULL; -} - static int32_t init_stack_value(struct protocol_stack *stack, uint16_t queue_id) { struct protocol_stack_group *stack_group = get_protocol_stack_group(); @@ -288,11 +267,6 @@ static int32_t init_stack_value(struct protocol_stack *stack, uint16_t queue_id) stack_group->stacks[queue_id] = stack; set_stack_idx(queue_id); - stack->epollfd = posix_api->epoll_create_fn(GAZELLE_LSTACK_MAX_CONN); - if (stack->epollfd < 0) { - return -1; - } - stack->socket_id = numa_node_of_cpu(stack->cpu_id); if (stack->socket_id < 0) { LSTACK_LOG(ERR, LSTACK, "numa_node_of_cpu failed\n"); @@ -327,7 +301,7 @@ static int32_t create_affiliate_thread(uint16_t queue_id, bool wakeup_enable) } } - if (create_thread(queue_id, "gazellekernel", gazelle_kernelevent_thread) != 0) { + if (create_thread(queue_id, "gazellekernel", kernel_wait_thread) != 0) { LSTACK_LOG(ERR, LSTACK, "gazellekernel errno=%d\n", errno); return -1; } @@ -390,27 +364,6 @@ END: return NULL; } -static void wakeup_kernel_event(struct protocol_stack *stack) -{ - if (stack->kernel_event_num == 0) { - return; - } - - for (int32_t i = 0; i < stack->kernel_event_num; i++) { - struct wakeup_poll *wakeup = stack->kernel_events[i].data.ptr; - if (wakeup->type == WAKEUP_CLOSE) { - continue; - } - - __atomic_store_n(&wakeup->have_kernel_event, true, __ATOMIC_RELEASE); - if (list_is_null(&wakeup->wakeup_list[stack->queue_id])) { - list_add_node(&stack->wakeup_list, &wakeup->wakeup_list[stack->queue_id]); - } - } - - stack->kernel_event_num = 0; -} - void stack_send_pkts(struct protocol_stack *stack) { uint32_t send_num = stack->send_end - stack->send_start; @@ -492,7 +445,7 @@ static void* gazelle_stack_thread(void *arg) read_recv_list(stack, read_connect_number); if ((wakeup_tick & 0xf) == 0) { - wakeup_kernel_event(stack); + wakeup_kernel_wait(&stack->knwait, &stack->wakeup_list, stack->queue_id); wakeup_stack_epoll(stack, wakeup_thread_enable); } diff --git a/src/lstack/include/lstack_kernel_wait.h b/src/lstack/include/lstack_kernel_wait.h new file mode 100644 index 0000000..76bb823 --- /dev/null +++ b/src/lstack/include/lstack_kernel_wait.h @@ -0,0 +1,35 @@ +/* +* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. +* gazelle is licensed under the Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +* PURPOSE. +* See the Mulan PSL v2 for more details. +*/ + +#ifndef _GAZELLE_KERNEL_WAIT_H_ +#define _GAZELLE_KERNEL_WAIT_H_ + +#include +#include + +#include "gazelle_opt.h" + +struct wakeup_poll; +struct kernel_wait { + /* kernel event thread epoll fd */ + int32_t epollfd; + /* kernel event thread read/write frequently */ + int32_t kernel_event_num; + struct epoll_event kernel_events[KERNEL_EPOLL_MAX]; +}; + +void kernel_wait_change_thread(struct wakeup_poll *wakeup, struct kernel_wait *old_knwait, + struct kernel_wait *new_knwait); +void wakeup_kernel_wait(struct kernel_wait *knwait, struct list_node *wakeup_list, uint16_t queue_id); +void* kernel_wait_thread(void *arg); + +#endif /* _GAZELLE_KERNEL_WAIT_H_ */ diff --git a/src/lstack/include/lstack_protocol_stack.h b/src/lstack/include/lstack_protocol_stack.h index b093362..0e79c93 100644 --- a/src/lstack/include/lstack_protocol_stack.h +++ b/src/lstack/include/lstack_protocol_stack.h @@ -24,6 +24,7 @@ #include "lstack_lockless_queue.h" #include "lstack_ethdev.h" #include "gazelle_opt.h" +#include "lstack_kernel_wait.h" #define SOCK_RECV_RING_SIZE (128) #define SOCK_RECV_FREE_THRES (32) @@ -50,7 +51,6 @@ struct protocol_stack { uint16_t socket_id; uint16_t cpu_id; cpu_set_t idle_cpuset; /* idle cpu in numa of stack, app thread bind to it */ - int32_t epollfd; /* kernel event thread epoll fd */ struct rte_mempool *rxtx_pktmbuf_pool; struct rte_ring *rx_ring; @@ -64,9 +64,7 @@ struct protocol_stack { lockless_queue rpc_queue __rte_cache_aligned; char pad __rte_cache_aligned; - /* kernel event thread read/write frequently */ - struct epoll_event kernel_events[KERNEL_EPOLL_MAX]; - int32_t kernel_event_num; + struct kernel_wait knwait; char pad1 __rte_cache_aligned; struct netif netif; -- 2.33.0