refactor kernel_wait of epoll

Signed-off-by: Lemmy Huang <huangliming5@huawei.com>
This commit is contained in:
Lemmy Huang 2023-03-15 16:37:31 +08:00
parent 18d30d4405
commit 30693a54e9
2 changed files with 447 additions and 1 deletions

View File

@ -0,0 +1,442 @@
From 94f2e0c57d2668666f868cb32984e7833be8f296 Mon Sep 17 00:00:00 2001
From: Lemmy Huang <huangliming5@huawei.com>
Date: Wed, 15 Mar 2023 11:17:23 +0800
Subject: [PATCH 2/2] refactor kernel_wait of epoll
Signed-off-by: Lemmy Huang <huangliming5@huawei.com>
---
src/common/gazelle_dfx_msg.h | 5 +-
src/common/gazelle_opt.h | 2 +
src/lstack/api/lstack_epoll.c | 42 ++++-------
src/lstack/core/dir.mk | 3 +-
src/lstack/core/lstack_kernel_wait.c | 86 ++++++++++++++++++++++
src/lstack/core/lstack_protocol_stack.c | 51 +------------
src/lstack/include/lstack_kernel_wait.h | 35 +++++++++
src/lstack/include/lstack_protocol_stack.h | 6 +-
8 files changed, 147 insertions(+), 83 deletions(-)
create mode 100644 src/lstack/core/lstack_kernel_wait.c
create mode 100644 src/lstack/include/lstack_kernel_wait.h
diff --git a/src/common/gazelle_dfx_msg.h b/src/common/gazelle_dfx_msg.h
index 674f2d7..7f75d12 100644
--- a/src/common/gazelle_dfx_msg.h
+++ b/src/common/gazelle_dfx_msg.h
@@ -17,12 +17,11 @@
#include <stdint.h>
#include <sys/stat.h>
+#include "gazelle_opt.h"
+
#define GAZELLE_CLIENT_NUM_MIN 1
#define GAZELLE_LOG_LEVEL_MAX 10
-/* maybe it should be consistent with MEMP_NUM_TCP_PCB */
-#define GAZELLE_LSTACK_MAX_CONN (20000 + 2000) // same as MAX_CLIENTS + RESERVED_CLIENTS in lwipopts.h
-
enum GAZELLE_STAT_MODE {
GAZELLE_STAT_LTRAN_SHOW = 0,
GAZELLE_STAT_LTRAN_SHOW_RATE,
diff --git a/src/common/gazelle_opt.h b/src/common/gazelle_opt.h
index 76b89ce..fb903af 100644
--- a/src/common/gazelle_opt.h
+++ b/src/common/gazelle_opt.h
@@ -25,6 +25,8 @@
#define PROTOCOL_STACK_MAX 32
#define KERNEL_EPOLL_MAX 512
+/* it should be consistent with MEMP_NUM_TCP_PCB in lwipopts.h */
+#define GAZELLE_LSTACK_MAX_CONN (20000 + 2000)
#define ETHER_ADDR_LEN 6
diff --git a/src/lstack/api/lstack_epoll.c b/src/lstack/api/lstack_epoll.c
index da29590..39cc3de 100644
--- a/src/lstack/api/lstack_epoll.c
+++ b/src/lstack/api/lstack_epoll.c
@@ -35,6 +35,7 @@
#include "lstack_lwip.h"
#include "lstack_protocol_stack.h"
#include "posix/lstack_epoll.h"
+#include "lstack_kernel_wait.h"
#define EPOLL_KERNEL_INTERVAL 10 /* ms */
#define SEC_TO_NSEC 1000000000
@@ -43,8 +44,6 @@
#define POLL_KERNEL_EVENTS 32
static void update_epoll_max_stack(struct wakeup_poll *wakeup);
-static void change_epollfd_kernel_thread(struct wakeup_poll *wakeup, struct protocol_stack *old_stack,
- struct protocol_stack *new_stack);
void add_sock_event(struct lwip_sock *sock, uint32_t event)
{
@@ -167,6 +166,11 @@ static void raise_pending_events(struct wakeup_poll *wakeup, struct lwip_sock *s
}
}
+static struct kernel_wait *get_stack_knwait(struct protocol_stack *stack)
+{
+ return stack == NULL ? NULL : &stack->knwait;
+}
+
int32_t lstack_do_epoll_create(int32_t fd)
{
if (fd < 0) {
@@ -213,7 +217,8 @@ int32_t lstack_do_epoll_create(int32_t fd)
sock->wakeup = wakeup;
update_epoll_max_stack(wakeup);
- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, wakeup->max_stack);
+ kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack),
+ get_stack_knwait(wakeup->max_stack));
wakeup->bind_stack = wakeup->max_stack;
if (get_global_cfg_params()->app_bind_numa) {
bind_to_stack_numa(wakeup->bind_stack);
@@ -431,29 +436,12 @@ static int32_t poll_lwip_event(struct pollfd *fds, nfds_t nfds)
return event_num;
}
-static void change_epollfd_kernel_thread(struct wakeup_poll *wakeup, struct protocol_stack *old_stack,
- struct protocol_stack *new_stack)
-{
- if (old_stack) {
- if (posix_api->epoll_ctl_fn(old_stack->epollfd, EPOLL_CTL_DEL, wakeup->epollfd, NULL) != 0) {
- LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno);
- }
- }
-
- /* avoid kernel thread post too much, use EPOLLET */
- struct epoll_event event;
- event.data.ptr = wakeup;
- event.events = EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLET;
- if (posix_api->epoll_ctl_fn(new_stack->epollfd, EPOLL_CTL_ADD, wakeup->epollfd, &event) != 0) {
- LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno);
- }
-}
-
static void epoll_bind_statck(struct wakeup_poll *wakeup)
{
if (wakeup->bind_stack != wakeup->max_stack && wakeup->max_stack) {
bind_to_stack_numa(wakeup->max_stack);
- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, wakeup->max_stack);
+ kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack),
+ get_stack_knwait(wakeup->max_stack));
wakeup->bind_stack = wakeup->max_stack;
}
}
@@ -555,7 +543,8 @@ static int32_t init_poll_wakeup_data(struct wakeup_poll *wakeup)
int32_t stack_count[PROTOCOL_STACK_MAX] = {0};
uint16_t bind_id = find_max_cnt_stack(stack_count, stack_group->stack_num, wakeup->bind_stack);
- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, stack_group->stacks[bind_id]);
+ kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack),
+ get_stack_knwait(stack_group->stacks[bind_id]));
wakeup->bind_stack = stack_group->stacks[bind_id];
if (get_global_cfg_params()->app_bind_numa) {
bind_to_stack_numa(wakeup->bind_stack);
@@ -585,7 +574,7 @@ static void resize_kernel_poll(struct wakeup_poll *wakeup, nfds_t nfds)
wakeup->last_max_nfds = nfds;
}
-static void poll_bind_statck(struct wakeup_poll *wakeup, int32_t *stack_count)
+static void poll_bind_stack(struct wakeup_poll *wakeup, int32_t *stack_count)
{
struct protocol_stack_group *stack_group = get_protocol_stack_group();
@@ -594,7 +583,8 @@ static void poll_bind_statck(struct wakeup_poll *wakeup, int32_t *stack_count)
return;
}
- change_epollfd_kernel_thread(wakeup, wakeup->bind_stack, stack_group->stacks[bind_id]);
+ kernel_wait_change_thread(wakeup, get_stack_knwait(wakeup->bind_stack),
+ get_stack_knwait(stack_group->stacks[bind_id]));
bind_to_stack_numa(stack_group->stacks[bind_id]);
wakeup->bind_stack = stack_group->stacks[bind_id];
}
@@ -663,7 +653,7 @@ static void poll_init(struct wakeup_poll *wakeup, struct pollfd *fds, nfds_t nfd
wakeup->last_nfds = nfds;
if (get_global_cfg_params()->app_bind_numa) {
- poll_bind_statck(wakeup, stack_count);
+ poll_bind_stack(wakeup, stack_count);
}
}
diff --git a/src/lstack/core/dir.mk b/src/lstack/core/dir.mk
index 88c1e08..0908f73 100644
--- a/src/lstack/core/dir.mk
+++ b/src/lstack/core/dir.mk
@@ -8,6 +8,7 @@
# PURPOSE.
# See the Mulan PSL v2 for more details.
-SRC = lstack_init.c lstack_cfg.c lstack_dpdk.c lstack_control_plane.c lstack_stack_stat.c lstack_lwip.c lstack_protocol_stack.c lstack_thread_rpc.c
+SRC = lstack_init.c lstack_cfg.c lstack_dpdk.c lstack_control_plane.c lstack_stack_stat.c lstack_lwip.c \
+ lstack_protocol_stack.c lstack_thread_rpc.c lstack_kernel_wait.c
$(eval $(call register_dir, core, $(SRC)))
diff --git a/src/lstack/core/lstack_kernel_wait.c b/src/lstack/core/lstack_kernel_wait.c
new file mode 100644
index 0000000..9da4f59
--- /dev/null
+++ b/src/lstack/core/lstack_kernel_wait.c
@@ -0,0 +1,86 @@
+/*
+* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+* gazelle is licensed under the Mulan PSL v2.
+* You can use this software according to the terms and conditions of the Mulan PSL v2.
+* You may obtain a copy of Mulan PSL v2 at:
+* http://license.coscl.org.cn/MulanPSL2
+* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+* PURPOSE.
+* See the Mulan PSL v2 for more details.
+*/
+
+#include <sys/socket.h>
+#include <lwip/posix_api.h>
+
+#include "lstack_log.h"
+#include "lstack_kernel_wait.h"
+#include "lstack_protocol_stack.h"
+#include "posix/lstack_epoll.h"
+
+#define KERNEL_EVENT_100us 100
+
+void kernel_wait_change_thread(struct wakeup_poll *wakeup, struct kernel_wait *old_knwait,
+ struct kernel_wait *new_knwait)
+{
+ if (old_knwait) {
+ if (posix_api->epoll_ctl_fn(old_knwait->epollfd, EPOLL_CTL_DEL, wakeup->epollfd, NULL) != 0) {
+ LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno);
+ }
+ }
+
+ /* avoid kernel thread post too much, use EPOLLET */
+ struct epoll_event event;
+ event.data.ptr = wakeup;
+ event.events = EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLET;
+ if (posix_api->epoll_ctl_fn(new_knwait->epollfd, EPOLL_CTL_ADD, wakeup->epollfd, &event) != 0) {
+ LSTACK_LOG(ERR, LSTACK, "epoll_ctl_fn errno=%d\n", errno);
+ }
+}
+
+void wakeup_kernel_wait(struct kernel_wait *knwait, struct list_node *wakeup_list, uint16_t queue_id)
+{
+ if (knwait->kernel_event_num == 0) {
+ return;
+ }
+
+ for (int32_t i = 0; i < knwait->kernel_event_num; i++) {
+ struct wakeup_poll *wakeup = knwait->kernel_events[i].data.ptr;
+ if (wakeup->type == WAKEUP_CLOSE) {
+ continue;
+ }
+
+ __atomic_store_n(&wakeup->have_kernel_event, true, __ATOMIC_RELEASE);
+ if (list_is_null(&wakeup->wakeup_list[queue_id])) {
+ list_add_node(wakeup_list, &wakeup->wakeup_list[queue_id]);
+ }
+ }
+
+ knwait->kernel_event_num = 0;
+}
+
+void* kernel_wait_thread(void *arg)
+{
+ uint16_t queue_id = *(uint16_t *)arg;
+ struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id];
+ struct kernel_wait *knwait = &stack->knwait;
+
+ bind_to_stack_numa(stack);
+
+ knwait->epollfd = posix_api->epoll_create_fn(GAZELLE_LSTACK_MAX_CONN);
+ if (knwait->epollfd < 0) {
+ LSTACK_LOG(ERR, LSTACK, "epoll_create_fn errno=%d\n", errno);
+ return NULL;
+ }
+
+ LSTACK_LOG(INFO, LSTACK, "kernelevent_%02hu start\n", queue_id);
+
+ for (;;) {
+ knwait->kernel_event_num = posix_api->epoll_wait_fn(knwait->epollfd, knwait->kernel_events, KERNEL_EPOLL_MAX, -1);
+ while (knwait->kernel_event_num > 0) {
+ usleep(KERNEL_EVENT_100us);
+ }
+ }
+
+ return NULL;
+}
\ No newline at end of file
diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c
index 93204d1..d78eec2 100644
--- a/src/lstack/core/lstack_protocol_stack.c
+++ b/src/lstack/core/lstack_protocol_stack.c
@@ -36,8 +36,6 @@
#include "posix/lstack_epoll.h"
#include "lstack_stack_stat.h"
-#define KERNEL_EVENT_100us 100
-
static PER_THREAD struct protocol_stack *g_stack_p = NULL;
static struct protocol_stack_group g_stack_group = {0};
@@ -250,25 +248,6 @@ static void* gazelle_wakeup_thread(void *arg)
return NULL;
}
-static void* gazelle_kernelevent_thread(void *arg)
-{
- uint16_t queue_id = *(uint16_t *)arg;
- struct protocol_stack *stack = get_protocol_stack_group()->stacks[queue_id];
-
- bind_to_stack_numa(stack);
-
- LSTACK_LOG(INFO, LSTACK, "kernelevent_%02hu start\n", queue_id);
-
- for (;;) {
- stack->kernel_event_num = posix_api->epoll_wait_fn(stack->epollfd, stack->kernel_events, KERNEL_EPOLL_MAX, -1);
- while (stack->kernel_event_num > 0) {
- usleep(KERNEL_EVENT_100us);
- }
- }
-
- return NULL;
-}
-
static int32_t init_stack_value(struct protocol_stack *stack, uint16_t queue_id)
{
struct protocol_stack_group *stack_group = get_protocol_stack_group();
@@ -288,11 +267,6 @@ static int32_t init_stack_value(struct protocol_stack *stack, uint16_t queue_id)
stack_group->stacks[queue_id] = stack;
set_stack_idx(queue_id);
- stack->epollfd = posix_api->epoll_create_fn(GAZELLE_LSTACK_MAX_CONN);
- if (stack->epollfd < 0) {
- return -1;
- }
-
stack->socket_id = numa_node_of_cpu(stack->cpu_id);
if (stack->socket_id < 0) {
LSTACK_LOG(ERR, LSTACK, "numa_node_of_cpu failed\n");
@@ -327,7 +301,7 @@ static int32_t create_affiliate_thread(uint16_t queue_id, bool wakeup_enable)
}
}
- if (create_thread(queue_id, "gazellekernel", gazelle_kernelevent_thread) != 0) {
+ if (create_thread(queue_id, "gazellekernel", kernel_wait_thread) != 0) {
LSTACK_LOG(ERR, LSTACK, "gazellekernel errno=%d\n", errno);
return -1;
}
@@ -390,27 +364,6 @@ END:
return NULL;
}
-static void wakeup_kernel_event(struct protocol_stack *stack)
-{
- if (stack->kernel_event_num == 0) {
- return;
- }
-
- for (int32_t i = 0; i < stack->kernel_event_num; i++) {
- struct wakeup_poll *wakeup = stack->kernel_events[i].data.ptr;
- if (wakeup->type == WAKEUP_CLOSE) {
- continue;
- }
-
- __atomic_store_n(&wakeup->have_kernel_event, true, __ATOMIC_RELEASE);
- if (list_is_null(&wakeup->wakeup_list[stack->queue_id])) {
- list_add_node(&stack->wakeup_list, &wakeup->wakeup_list[stack->queue_id]);
- }
- }
-
- stack->kernel_event_num = 0;
-}
-
void stack_send_pkts(struct protocol_stack *stack)
{
uint32_t send_num = stack->send_end - stack->send_start;
@@ -492,7 +445,7 @@ static void* gazelle_stack_thread(void *arg)
read_recv_list(stack, read_connect_number);
if ((wakeup_tick & 0xf) == 0) {
- wakeup_kernel_event(stack);
+ wakeup_kernel_wait(&stack->knwait, &stack->wakeup_list, stack->queue_id);
wakeup_stack_epoll(stack, wakeup_thread_enable);
}
diff --git a/src/lstack/include/lstack_kernel_wait.h b/src/lstack/include/lstack_kernel_wait.h
new file mode 100644
index 0000000..76bb823
--- /dev/null
+++ b/src/lstack/include/lstack_kernel_wait.h
@@ -0,0 +1,35 @@
+/*
+* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
+* gazelle is licensed under the Mulan PSL v2.
+* You can use this software according to the terms and conditions of the Mulan PSL v2.
+* You may obtain a copy of Mulan PSL v2 at:
+* http://license.coscl.org.cn/MulanPSL2
+* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+* PURPOSE.
+* See the Mulan PSL v2 for more details.
+*/
+
+#ifndef _GAZELLE_KERNEL_WAIT_H_
+#define _GAZELLE_KERNEL_WAIT_H_
+
+#include <sys/epoll.h>
+#include <lwip/list.h>
+
+#include "gazelle_opt.h"
+
+struct wakeup_poll;
+struct kernel_wait {
+ /* kernel event thread epoll fd */
+ int32_t epollfd;
+ /* kernel event thread read/write frequently */
+ int32_t kernel_event_num;
+ struct epoll_event kernel_events[KERNEL_EPOLL_MAX];
+};
+
+void kernel_wait_change_thread(struct wakeup_poll *wakeup, struct kernel_wait *old_knwait,
+ struct kernel_wait *new_knwait);
+void wakeup_kernel_wait(struct kernel_wait *knwait, struct list_node *wakeup_list, uint16_t queue_id);
+void* kernel_wait_thread(void *arg);
+
+#endif /* _GAZELLE_KERNEL_WAIT_H_ */
diff --git a/src/lstack/include/lstack_protocol_stack.h b/src/lstack/include/lstack_protocol_stack.h
index b093362..0e79c93 100644
--- a/src/lstack/include/lstack_protocol_stack.h
+++ b/src/lstack/include/lstack_protocol_stack.h
@@ -24,6 +24,7 @@
#include "lstack_lockless_queue.h"
#include "lstack_ethdev.h"
#include "gazelle_opt.h"
+#include "lstack_kernel_wait.h"
#define SOCK_RECV_RING_SIZE (128)
#define SOCK_RECV_FREE_THRES (32)
@@ -50,7 +51,6 @@ struct protocol_stack {
uint16_t socket_id;
uint16_t cpu_id;
cpu_set_t idle_cpuset; /* idle cpu in numa of stack, app thread bind to it */
- int32_t epollfd; /* kernel event thread epoll fd */
struct rte_mempool *rxtx_pktmbuf_pool;
struct rte_ring *rx_ring;
@@ -64,9 +64,7 @@ struct protocol_stack {
lockless_queue rpc_queue __rte_cache_aligned;
char pad __rte_cache_aligned;
- /* kernel event thread read/write frequently */
- struct epoll_event kernel_events[KERNEL_EPOLL_MAX];
- int32_t kernel_event_num;
+ struct kernel_wait knwait;
char pad1 __rte_cache_aligned;
struct netif netif;
--
2.33.0

View File

@ -2,7 +2,7 @@
Name: gazelle
Version: 1.0.1
Release: 51
Release: 52
Summary: gazelle is a high performance user-mode stack
License: MulanPSL-2.0
URL: https://gitee.com/openeuler/gazelle
@ -212,6 +212,7 @@ Patch9194: 0194-eneble-TSO-and-fix-TSO-mbuf-pktlen-error.patch
Patch9195: 0195-adapt-unsupport-sock-optname.patch
Patch9196: 0196-reduce-duplicate-code-in-lstack_cfg.c.patch
Patch9197: 0197-refactor-mbuf-private-data.patch
Patch9198: 0198-refactor-kernel_wait-of-epoll.patch
%description
%{name} is a high performance user-mode stack.
@ -252,6 +253,9 @@ install -Dpm 0640 %{_builddir}/%{name}-%{version}/src/ltran/ltran.conf %{b
%config(noreplace) %{conf_path}/ltran.conf
%changelog
* Wed Mar 15 2023 Lemmy Huang <huangliming5@huawei.com> - 1.0.1-52
- refactor kernel_wait of epoll
* Wed Mar 15 2023 Lemmy Huang <huangliming5@huawei.com> - 1.0.1-51
- refactor mbuf private data