diff --git a/0198-refactor-kernel_wait-of-epoll.patch b/0198-refactor-kernel_wait-of-epoll.patch new file mode 100644 index 0000000..eea7ed6 --- /dev/null +++ b/0198-refactor-kernel_wait-of-epoll.patch @@ -0,0 +1,442 @@ +From 94f2e0c57d2668666f868cb32984e7833be8f296 Mon Sep 17 00:00:00 2001 +From: Lemmy Huang +Date: Wed, 15 Mar 2023 11:17:23 +0800 +Subject: [PATCH 2/2] refactor kernel_wait of epoll + +Signed-off-by: Lemmy Huang +--- + src/common/gazelle_dfx_msg.h | 5 +- + src/common/gazelle_opt.h | 2 + + src/lstack/api/lstack_epoll.c | 42 ++++------- + src/lstack/core/dir.mk | 3 +- + src/lstack/core/lstack_kernel_wait.c | 86 ++++++++++++++++++++++ + src/lstack/core/lstack_protocol_stack.c | 51 +------------ + src/lstack/include/lstack_kernel_wait.h | 35 +++++++++ + src/lstack/include/lstack_protocol_stack.h | 6 +- + 8 files changed, 147 insertions(+), 83 deletions(-) + create mode 100644 src/lstack/core/lstack_kernel_wait.c + create mode 100644 src/lstack/include/lstack_kernel_wait.h + +diff --git a/src/common/gazelle_dfx_msg.h b/src/common/gazelle_dfx_msg.h +index 674f2d7..7f75d12 100644 +--- a/src/common/gazelle_dfx_msg.h ++++ b/src/common/gazelle_dfx_msg.h +@@ -17,12 +17,11 @@ + #include + #include + ++#include "gazelle_opt.h" ++ + #define GAZELLE_CLIENT_NUM_MIN 1 + #define GAZELLE_LOG_LEVEL_MAX 10 + +-/* maybe it should be consistent with MEMP_NUM_TCP_PCB */ +-#define GAZELLE_LSTACK_MAX_CONN (20000 + 2000) // same as MAX_CLIENTS + RESERVED_CLIENTS in lwipopts.h +- + enum GAZELLE_STAT_MODE { + GAZELLE_STAT_LTRAN_SHOW = 0, + GAZELLE_STAT_LTRAN_SHOW_RATE, +diff --git a/src/common/gazelle_opt.h b/src/common/gazelle_opt.h +index 76b89ce..fb903af 100644 +--- a/src/common/gazelle_opt.h ++++ b/src/common/gazelle_opt.h +@@ -25,6 +25,8 @@ + + #define PROTOCOL_STACK_MAX 32 + #define KERNEL_EPOLL_MAX 512 ++/* it should be consistent with MEMP_NUM_TCP_PCB in lwipopts.h */ ++#define GAZELLE_LSTACK_MAX_CONN (20000 + 2000) + + #define ETHER_ADDR_LEN 6 + +diff --git a/src/lstack/api/lstack_epoll.c b/src/lstack/api/lstack_epoll.c +index da29590..39cc3de 100644 +--- a/src/lstack/api/lstack_epoll.c ++++ b/src/lstack/api/lstack_epoll.c +@@ -35,6 +35,7 @@ + #include "lstack_lwip.h" + #include "lstack_protocol_stack.h" + #include "posix/lstack_epoll.h" ++#include "lstack_kernel_wait.h" + + #define EPOLL_KERNEL_INTERVAL 10 /* ms */ + #define SEC_TO_NSEC 1000000000 +@@ -43,8 +44,6 @@ + #define POLL_KERNEL_EVENTS 32 + + static void update_epoll_max_stack(struct wakeup_poll *wakeup); +-static void change_epollfd_kernel_thread(struct wakeup_poll *wakeup, struct protocol_stack *old_stack, +- struct protocol_stack *new_stack); + + void add_sock_event(struct lwip_sock *sock, uint32_t event) + { +@@ -167,6 +166,11 @@ static void raise_pending_events(struct wakeup_poll *wakeup, struct lwip_sock *s + } + } + ++static struct kernel_wait *get_stack_knwait(struct protocol_stack *stack) ++{ ++ return stack == NULL ? NULL : &stack->knwait; ++} ++ + int32_t lstack_do_epoll_create(int32_t fd) + { + if (fd < 0) { +@@ -213,7 +217,8 @@ int32_t lstack_do_epoll_create(int32_t fd) + sock->wakeup = wakeup; + + update_epoll_max_stack(wakeup); +- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, wakeup->max_stack); ++ kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack), ++ get_stack_knwait(wakeup->max_stack)); + wakeup->bind_stack = wakeup->max_stack; + if (get_global_cfg_params()->app_bind_numa) { + bind_to_stack_numa(wakeup->bind_stack); +@@ -431,29 +436,12 @@ static int32_t poll_lwip_event(struct pollfd *fds, nfds_t nfds) + return event_num; + } + +-static void change_epollfd_kernel_thread(struct wakeup_poll *wakeup, struct protocol_stack *old_stack, +- struct protocol_stack *new_stack) +-{ +- if (old_stack) { +- if (posix_api->epoll_ctl_fn(old_stack->epollfd, EPOLL_CTL_DEL, wakeup->epollfd, NULL) != 0) { +- LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno); +- } +- } +- +- /* avoid kernel thread post too much, use EPOLLET */ +- struct epoll_event event; +- event.data.ptr = wakeup; +- event.events = EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLET; +- if (posix_api->epoll_ctl_fn(new_stack->epollfd, EPOLL_CTL_ADD, wakeup->epollfd, &event) != 0) { +- LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno); +- } +-} +- + static void epoll_bind_statck(struct wakeup_poll *wakeup) + { + if (wakeup->bind_stack != wakeup->max_stack && wakeup->max_stack) { + bind_to_stack_numa(wakeup->max_stack); +- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, wakeup->max_stack); ++ kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack), ++ get_stack_knwait(wakeup->max_stack)); + wakeup->bind_stack = wakeup->max_stack; + } + } +@@ -555,7 +543,8 @@ static int32_t init_poll_wakeup_data(struct wakeup_poll *wakeup) + + int32_t stack_count[PROTOCOL_STACK_MAX] = {0}; + uint16_t bind_id = find_max_cnt_stack(stack_count, stack_group->stack_num, wakeup->bind_stack); +- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, stack_group->stacks[bind_id]); ++ kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack), ++ get_stack_knwait(stack_group->stacks[bind_id])); + wakeup->bind_stack = stack_group->stacks[bind_id]; + if (get_global_cfg_params()->app_bind_numa) { + bind_to_stack_numa(wakeup->bind_stack); +@@ -585,7 +574,7 @@ static void resize_kernel_poll(struct wakeup_poll *wakeup, nfds_t nfds) + wakeup->last_max_nfds = nfds; + } + +-static void poll_bind_statck(struct wakeup_poll *wakeup, int32_t *stack_count) ++static void poll_bind_stack(struct wakeup_poll *wakeup, int32_t *stack_count) + { + struct protocol_stack_group *stack_group = get_protocol_stack_group(); + +@@ -594,7 +583,8 @@ static void poll_bind_statck(struct wakeup_poll *wakeup, int32_t *stack_count) + return; + } + +- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, stack_group->stacks[bind_id]); ++ kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack), ++ get_stack_knwait(stack_group->stacks[bind_id])); + bind_to_stack_numa(stack_group->stacks[bind_id]); + wakeup->bind_stack = stack_group->stacks[bind_id]; + } +@@ -663,7 +653,7 @@ static void poll_init(struct wakeup_poll *wakeup, struct pollfd *fds, nfds_t nfd + wakeup->last_nfds = nfds; + + if (get_global_cfg_params()->app_bind_numa) { +- poll_bind_statck(wakeup, stack_count); ++ poll_bind_stack(wakeup, stack_count); + } + } + +diff --git a/src/lstack/core/dir.mk b/src/lstack/core/dir.mk +index 88c1e08..0908f73 100644 +--- a/src/lstack/core/dir.mk ++++ b/src/lstack/core/dir.mk +@@ -8,6 +8,7 @@ + # PURPOSE. + # See the Mulan PSL v2 for more details. + +-SRC = lstack_init.c lstack_cfg.c lstack_dpdk.c lstack_control_plane.c lstack_stack_stat.c lstack_lwip.c lstack_protocol_stack.c lstack_thread_rpc.c ++SRC = lstack_init.c lstack_cfg.c lstack_dpdk.c lstack_control_plane.c lstack_stack_stat.c lstack_lwip.c \ ++ lstack_protocol_stack.c lstack_thread_rpc.c lstack_kernel_wait.c + $(eval $(call register_dir, core, $(SRC))) + +diff --git a/src/lstack/core/lstack_kernel_wait.c b/src/lstack/core/lstack_kernel_wait.c +new file mode 100644 +index 0000000..9da4f59 +--- /dev/null ++++ b/src/lstack/core/lstack_kernel_wait.c +@@ -0,0 +1,86 @@ ++/* ++* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. ++* gazelle is licensed under the Mulan PSL v2. ++* You can use this software according to the terms and conditions of the Mulan PSL v2. ++* You may obtain a copy of Mulan PSL v2 at: ++* http://license.coscl.org.cn/MulanPSL2 ++* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++* PURPOSE. ++* See the Mulan PSL v2 for more details. ++*/ ++ ++#include ++#include ++ ++#include "lstack_log.h" ++#include "lstack_kernel_wait.h" ++#include "lstack_protocol_stack.h" ++#include "posix/lstack_epoll.h" ++ ++#define KERNEL_EVENT_100us 100 ++ ++void kernel_wait_change_thread(struct wakeup_poll *wakeup, struct kernel_wait *old_knwait, ++ struct kernel_wait *new_knwait) ++{ ++ if (old_knwait) { ++ if (posix_api->epoll_ctl_fn(old_knwait->epollfd, EPOLL_CTL_DEL, wakeup->epollfd, NULL) != 0) { ++ LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno); ++ } ++ } ++ ++ /* avoid kernel thread post too much, use EPOLLET */ ++ struct epoll_event event; ++ event.data.ptr = wakeup; ++ event.events = EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLET; ++ if (posix_api->epoll_ctl_fn(new_knwait->epollfd, EPOLL_CTL_ADD, wakeup->epollfd, &event) != 0) { ++ LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno); ++ } ++} ++ ++void wakeup_kernel_wait(struct kernel_wait *knwait, struct list_node *wakeup_list, uint16_t queue_id) ++{ ++ if (knwait->kernel_event_num == 0) { ++ return; ++ } ++ ++ for (int32_t i = 0; i < knwait->kernel_event_num; i++) { ++ struct wakeup_poll *wakeup = knwait->kernel_events[i].data.ptr; ++ if (wakeup->type == WAKEUP_CLOSE) { ++ continue; ++ } ++ ++ __atomic_store_n(&wakeup->have_kernel_event, true, __ATOMIC_RELEASE); ++ if (list_is_null(&wakeup->wakeup_list[queue_id])) { ++ list_add_node(wakeup_list, &wakeup->wakeup_list[queue_id]); ++ } ++ } ++ ++ knwait->kernel_event_num = 0; ++} ++ ++void* kernel_wait_thread(void *arg) ++{ ++ uint16_t queue_id = *(uint16_t *)arg; ++ struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id]; ++ struct kernel_wait *knwait = &stack->knwait; ++ ++ bind_to_stack_numa(stack); ++ ++ knwait->epollfd = posix_api->epoll_create_fn(GAZELLE_LSTACK_MAX_CONN); ++ if (knwait->epollfd < 0) { ++ LSTACK_LOG(ERR, LSTACK, "epoll_create_fn errno=%d\n", errno); ++ return NULL; ++ } ++ ++ LSTACK_LOG(INFO, LSTACK, "kernelevent_%02hu start\n", queue_id); ++ ++ for (;;) { ++ knwait->kernel_event_num = posix_api->epoll_wait_fn(knwait->epollfd, knwait->kernel_events, KERNEL_EPOLL_MAX, -1); ++ while (knwait->kernel_event_num > 0) { ++ usleep(KERNEL_EVENT_100us); ++ } ++ } ++ ++ return NULL; ++} +\ No newline at end of file +diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c +index 93204d1..d78eec2 100644 +--- a/src/lstack/core/lstack_protocol_stack.c ++++ b/src/lstack/core/lstack_protocol_stack.c +@@ -36,8 +36,6 @@ + #include "posix/lstack_epoll.h" + #include "lstack_stack_stat.h" + +-#define KERNEL_EVENT_100us 100 +- + static PER_THREAD struct protocol_stack *g_stack_p = NULL; + static struct protocol_stack_group g_stack_group = {0}; + +@@ -250,25 +248,6 @@ static void* gazelle_wakeup_thread(void *arg) + return NULL; + } + +-static void* gazelle_kernelevent_thread(void *arg) +-{ +- uint16_t queue_id = *(uint16_t *)arg; +- struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id]; +- +- bind_to_stack_numa(stack); +- +- LSTACK_LOG(INFO, LSTACK, "kernelevent_%02hu start\n", queue_id); +- +- for (;;) { +- stack->kernel_event_num = posix_api->epoll_wait_fn(stack->epollfd, stack->kernel_events, KERNEL_EPOLL_MAX, -1); +- while (stack->kernel_event_num > 0) { +- usleep(KERNEL_EVENT_100us); +- } +- } +- +- return NULL; +-} +- + static int32_t init_stack_value(struct protocol_stack *stack, uint16_t queue_id) + { + struct protocol_stack_group *stack_group = get_protocol_stack_group(); +@@ -288,11 +267,6 @@ static int32_t init_stack_value(struct protocol_stack *stack, uint16_t queue_id) + stack_group->stacks[queue_id] = stack; + set_stack_idx(queue_id); + +- stack->epollfd = posix_api->epoll_create_fn(GAZELLE_LSTACK_MAX_CONN); +- if (stack->epollfd < 0) { +- return -1; +- } +- + stack->socket_id = numa_node_of_cpu(stack->cpu_id); + if (stack->socket_id < 0) { + LSTACK_LOG(ERR, LSTACK, "numa_node_of_cpu failed\n"); +@@ -327,7 +301,7 @@ static int32_t create_affiliate_thread(uint16_t queue_id, bool wakeup_enable) + } + } + +- if (create_thread(queue_id, "gazellekernel", gazelle_kernelevent_thread) != 0) { ++ if (create_thread(queue_id, "gazellekernel", kernel_wait_thread) != 0) { + LSTACK_LOG(ERR, LSTACK, "gazellekernel errno=%d\n", errno); + return -1; + } +@@ -390,27 +364,6 @@ END: + return NULL; + } + +-static void wakeup_kernel_event(struct protocol_stack *stack) +-{ +- if (stack->kernel_event_num == 0) { +- return; +- } +- +- for (int32_t i = 0; i < stack->kernel_event_num; i++) { +- struct wakeup_poll *wakeup = stack->kernel_events[i].data.ptr; +- if (wakeup->type == WAKEUP_CLOSE) { +- continue; +- } +- +- __atomic_store_n(&wakeup->have_kernel_event, true, __ATOMIC_RELEASE); +- if (list_is_null(&wakeup->wakeup_list[stack->queue_id])) { +- list_add_node(&stack->wakeup_list, &wakeup->wakeup_list[stack->queue_id]); +- } +- } +- +- stack->kernel_event_num = 0; +-} +- + void stack_send_pkts(struct protocol_stack *stack) + { + uint32_t send_num = stack->send_end - stack->send_start; +@@ -492,7 +445,7 @@ static void* gazelle_stack_thread(void *arg) + read_recv_list(stack, read_connect_number); + + if ((wakeup_tick & 0xf) == 0) { +- wakeup_kernel_event(stack); ++ wakeup_kernel_wait(&stack->knwait, &stack->wakeup_list, stack->queue_id); + wakeup_stack_epoll(stack, wakeup_thread_enable); + } + +diff --git a/src/lstack/include/lstack_kernel_wait.h b/src/lstack/include/lstack_kernel_wait.h +new file mode 100644 +index 0000000..76bb823 +--- /dev/null ++++ b/src/lstack/include/lstack_kernel_wait.h +@@ -0,0 +1,35 @@ ++/* ++* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. ++* gazelle is licensed under the Mulan PSL v2. ++* You can use this software according to the terms and conditions of the Mulan PSL v2. ++* You may obtain a copy of Mulan PSL v2 at: ++* http://license.coscl.org.cn/MulanPSL2 ++* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR ++* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR ++* PURPOSE. ++* See the Mulan PSL v2 for more details. ++*/ ++ ++#ifndef _GAZELLE_KERNEL_WAIT_H_ ++#define _GAZELLE_KERNEL_WAIT_H_ ++ ++#include ++#include ++ ++#include "gazelle_opt.h" ++ ++struct wakeup_poll; ++struct kernel_wait { ++ /* kernel event thread epoll fd */ ++ int32_t epollfd; ++ /* kernel event thread read/write frequently */ ++ int32_t kernel_event_num; ++ struct epoll_event kernel_events[KERNEL_EPOLL_MAX]; ++}; ++ ++void kernel_wait_change_thread(struct wakeup_poll *wakeup, struct kernel_wait *old_knwait, ++ struct kernel_wait *new_knwait); ++void wakeup_kernel_wait(struct kernel_wait *knwait, struct list_node *wakeup_list, uint16_t queue_id); ++void* kernel_wait_thread(void *arg); ++ ++#endif /* _GAZELLE_KERNEL_WAIT_H_ */ +diff --git a/src/lstack/include/lstack_protocol_stack.h b/src/lstack/include/lstack_protocol_stack.h +index b093362..0e79c93 100644 +--- a/src/lstack/include/lstack_protocol_stack.h ++++ b/src/lstack/include/lstack_protocol_stack.h +@@ -24,6 +24,7 @@ + #include "lstack_lockless_queue.h" + #include "lstack_ethdev.h" + #include "gazelle_opt.h" ++#include "lstack_kernel_wait.h" + + #define SOCK_RECV_RING_SIZE (128) + #define SOCK_RECV_FREE_THRES (32) +@@ -50,7 +51,6 @@ struct protocol_stack { + uint16_t socket_id; + uint16_t cpu_id; + cpu_set_t idle_cpuset; /* idle cpu in numa of stack, app thread bind to it */ +- int32_t epollfd; /* kernel event thread epoll fd */ + + struct rte_mempool *rxtx_pktmbuf_pool; + struct rte_ring *rx_ring; +@@ -64,9 +64,7 @@ struct protocol_stack { + lockless_queue rpc_queue __rte_cache_aligned; + char pad __rte_cache_aligned; + +- /* kernel event thread read/write frequently */ +- struct epoll_event kernel_events[KERNEL_EPOLL_MAX]; +- int32_t kernel_event_num; ++ struct kernel_wait knwait; + char pad1 __rte_cache_aligned; + + struct netif netif; +-- +2.33.0 + diff --git a/gazelle.spec b/gazelle.spec index b16ba8b..917d7dc 100644 --- a/gazelle.spec +++ b/gazelle.spec @@ -2,7 +2,7 @@ Name: gazelle Version: 1.0.1 -Release: 51 +Release: 52 Summary: gazelle is a high performance user-mode stack License: MulanPSL-2.0 URL: https://gitee.com/openeuler/gazelle @@ -212,6 +212,7 @@ Patch9194: 0194-eneble-TSO-and-fix-TSO-mbuf-pktlen-error.patch Patch9195: 0195-adapt-unsupport-sock-optname.patch Patch9196: 0196-reduce-duplicate-code-in-lstack_cfg.c.patch Patch9197: 0197-refactor-mbuf-private-data.patch +Patch9198: 0198-refactor-kernel_wait-of-epoll.patch %description %{name} is a high performance user-mode stack. @@ -252,6 +253,9 @@ install -Dpm 0640 %{_builddir}/%{name}-%{version}/src/ltran/ltran.conf %{b %config(noreplace) %{conf_path}/ltran.conf %changelog +* Wed Mar 15 2023 Lemmy Huang - 1.0.1-52 +- refactor kernel_wait of epoll + * Wed Mar 15 2023 Lemmy Huang - 1.0.1-51 - refactor mbuf private data