From 10eb6041c9a3e632b9fc2e769d7e726fbf7c9dd8 Mon Sep 17 00:00:00 2001 From: yangchen Date: Sat, 2 Nov 2024 16:03:54 +0800 Subject: [PATCH] xdp: support bind no cpu mode --- src/common/gazelle_base_func.h | 8 + src/common/gazelle_reg_msg.h | 21 +- src/lstack/api/lstack_rtw_api.c | 3 - src/lstack/api/lstack_wrap.c | 2 +- src/lstack/core/lstack_cfg.c | 303 +++++++++++---------- src/lstack/core/lstack_dpdk.c | 64 ++++- src/lstack/core/lstack_protocol_stack.c | 113 +++----- src/lstack/include/lstack_cfg.h | 10 +- src/lstack/include/lstack_dpdk.h | 2 + src/lstack/include/lstack_protocol_stack.h | 3 +- src/lstack/include/lstack_thread_rpc.h | 2 + src/lstack/netif/lstack_flow.c | 6 +- 12 files changed, 289 insertions(+), 248 deletions(-) diff --git a/src/common/gazelle_base_func.h b/src/common/gazelle_base_func.h index be87ccd..a579cd4 100644 --- a/src/common/gazelle_base_func.h +++ b/src/common/gazelle_base_func.h @@ -13,6 +13,8 @@ #ifndef __GAZELLE_BASE_FUNC_H__ #define __GAZELLE_BASE_FUNC_H__ +#include + #define GAZELLE_FREE(p) do { \ if (p) { \ free(p); \ @@ -28,6 +30,12 @@ #define NODE_ENTRY(node, type, member) \ ((type*)((char*)(node) - (size_t)&((type*)0)->member)) +#define MB_IN_BYTES (1024 * 1024) +static inline int bytes_to_mb(uint32_t bytes) +{ + return ceil((double)bytes / MB_IN_BYTES); +} + int32_t separate_str_to_array(char *args, uint32_t *array, int32_t array_size, int32_t max_value); int32_t check_and_set_run_dir(void); diff --git a/src/common/gazelle_reg_msg.h b/src/common/gazelle_reg_msg.h index d849cea..f9de32c 100644 --- a/src/common/gazelle_reg_msg.h +++ b/src/common/gazelle_reg_msg.h @@ -17,19 +17,22 @@ #include "gazelle_opt.h" -#define NULL_CLIENT_IP UINT32_MAX -#define NULL_CLIENT_PORT UINT16_MAX +#define NULL_CLIENT_IP UINT32_MAX +#define NULL_CLIENT_PORT UINT16_MAX -#define GAZELLE_MAX_REG_ARGS 32 +#define GAZELLE_MAX_REG_ARGS 32 #define ENQUEUE_RING_RETRY_TIMEOUT 10 // ms -#define OPT_BASE_VIRTADDR "--base-virtaddr" -#define OPT_FILE_PREFIX "--file-prefix" -#define OPT_SOCKET_MEM "--socket-mem" -#define OPT_LEGACY_MEM "--legacy-mem" -#define OPT_HUGE_DIR "--huge-dir" -#define OPT_BIND_CORELIST "-l" +#define OPT_BASE_VIRTADDR "--base-virtaddr" +#define OPT_FILE_PREFIX "--file-prefix" +#define OPT_SOCKET_MEM "--socket-mem" +#define OPT_LEGACY_MEM "--legacy-mem" +#define OPT_HUGE_DIR "--huge-dir" +#define OPT_BIND_CORELIST "-l" + +#define GAZELLE_MAX_NUMA_NODES 4 +#define SOCKET_MEM_STRLEN (GAZELLE_MAX_NUMA_NODES * 10) /* types for msg from lstack to ltran */ enum response_type { diff --git a/src/lstack/api/lstack_rtw_api.c b/src/lstack/api/lstack_rtw_api.c index eab379c..1b02e2a 100644 --- a/src/lstack/api/lstack_rtw_api.c +++ b/src/lstack/api/lstack_rtw_api.c @@ -235,9 +235,6 @@ static int stack_broadcast_listen(int fd, int backlog) #endif for (int32_t i = 0; i < stack_group->stack_num; ++i) { stack = stack_group->stacks[i]; - if (get_global_cfg_params()->seperate_send_recv && stack->is_send_thread) { - continue; - } if (stack != cur_stack) { clone_fd = rpc_call_shadow_fd(&stack->rpc_queue, fd, (struct sockaddr *)&addr, addr_len); if (clone_fd < 0) { diff --git a/src/lstack/api/lstack_wrap.c b/src/lstack/api/lstack_wrap.c index 66cea51..c228cab 100644 --- a/src/lstack/api/lstack_wrap.c +++ b/src/lstack/api/lstack_wrap.c @@ -393,7 +393,7 @@ static inline int32_t do_getsockopt(int32_t s, int32_t level, int32_t optname, v #define SO_NUMA_ID 0x100c if (select_sock_posix_path(lwip_get_socket(s)) == POSIX_LWIP && !unsupport_optname(level, optname)) { if (level == IPPROTO_IP && optname == SO_NUMA_ID) { - return lwip_get_socket(s)->stack->socket_id; + return lwip_get_socket(s)->stack->numa_id; } return g_wrap_api->getsockopt_fn(s, level, optname, optval, optlen); } diff --git a/src/lstack/core/lstack_cfg.c b/src/lstack/core/lstack_cfg.c index 4c8f066..43482af 100644 --- a/src/lstack/core/lstack_cfg.c +++ b/src/lstack/core/lstack_cfg.c @@ -22,14 +22,18 @@ #include #include #include +#include #include +#include +#include #include #include #include "common/gazelle_reg_msg.h" #include "common/gazelle_base_func.h" #include "lstack_log.h" +#include "lstack_dpdk.h" #include "lstack_cfg.h" #define DEFAULT_CONF_FILE "/etc/gazelle/lstack.conf" @@ -70,7 +74,6 @@ static int32_t parse_recv_ring_size(void); static int32_t parse_num_process(void); static int32_t parse_process_numa(void); static int32_t parse_process_index(void); -static int32_t parse_seperate_sendrecv_args(void); static int32_t parse_tuple_filter(void); static int32_t parse_bond_mode(void); static int32_t parse_bond_miimon(void); @@ -85,6 +88,7 @@ static int32_t parse_rpc_msg_max(void); static int32_t parse_send_cache_mode(void); static int32_t parse_flow_bifurcation(void); static int32_t parse_stack_interrupt(void); +static int32_t parse_stack_num(void); #define PARSE_ARG(_arg, _arg_string, _default_val, _min_val, _max_val, _ret) \ do { \ @@ -118,9 +122,16 @@ static struct config_vector_t g_config_tbl[] = { { "mask_addr", parse_mask_addr }, { "use_ltran", parse_use_ltran }, { "devices", parse_devices }, - { "dpdk_args", parse_dpdk_args }, - { "seperate_send_recv", parse_seperate_sendrecv_args }, + { "tcp_conn_count", parse_tcp_conn_count }, + { "mbuf_count_per_conn", parse_mbuf_count_per_conn }, + { "nic_rxqueue_size", parse_nic_rxqueue_size}, + { "nic_txqueue_size", parse_nic_txqueue_size}, + { "send_ring_size", parse_send_ring_size }, + { "recv_ring_size", parse_recv_ring_size }, + { "rpc_msg_max", parse_rpc_msg_max }, + { "stack_num", parse_stack_num }, { "num_cpus", parse_stack_cpu_number }, + { "dpdk_args", parse_dpdk_args }, { "low_power_mode", parse_low_power_mode }, { "kni_switch", parse_kni_switch }, { "listen_shadow", parse_listen_shadow }, @@ -128,13 +139,9 @@ static struct config_vector_t g_config_tbl[] = { { "app_exclude_cpus", parse_app_exclude_cpus }, { "main_thread_affinity", parse_main_thread_affinity }, { "unix_prefix", parse_unix_prefix }, - { "tcp_conn_count", parse_tcp_conn_count }, - { "mbuf_count_per_conn", parse_mbuf_count_per_conn }, { "read_connect_number", parse_read_connect_number }, { "rpc_number", parse_rpc_number }, { "nic_read_number", parse_nic_read_number }, - { "send_ring_size", parse_send_ring_size }, - { "recv_ring_size", parse_recv_ring_size }, { "num_process", parse_num_process }, { "process_numa", parse_process_numa }, { "process_idx", parse_process_index }, @@ -144,11 +151,8 @@ static struct config_vector_t g_config_tbl[] = { { "bond_slave_mac", parse_bond_slave_mac }, { "use_sockmap", parse_use_sockmap }, { "udp_enable", parse_udp_enable }, - { "nic_rxqueue_size", parse_nic_rxqueue_size}, - { "nic_txqueue_size", parse_nic_txqueue_size}, { "stack_thread_mode", parse_stack_thread_mode }, { "nic_vlan_mode", parse_nic_vlan_mode }, - { "rpc_msg_max", parse_rpc_msg_max }, { "send_cache_mode", parse_send_cache_mode }, { "flow_bifurcation", parse_flow_bifurcation}, { "stack_interrupt", parse_stack_interrupt}, @@ -354,135 +358,80 @@ static int32_t get_param_idx(int32_t argc, char **argv, const char *param) return -1; } -static bool have_corelist_arg(int32_t argc, char **argv) +static int32_t stack_bind_no_cpu(void) { - for (uint32_t i = 0; i < argc; i++) { - if (strncmp(argv[i], OPT_BIND_CORELIST, strlen(OPT_BIND_CORELIST)) == 0) { - return true; - } + uint16_t numa_id = 0; + + /* launch a lstack thread when neither num_cpus nor stack_num is specified */ + if (g_config_params.stack_num == 0) { + g_config_params.stack_num = 1; + } - if (strncmp(argv[i], "--lcores", strlen("--lcores")) == 0) { - return true; - } - - if (strncmp(argv[i], "-c", strlen("-c")) == 0) { - return true; - } + numa_id = numa_node_of_cpu(sched_getcpu()); + if (numa_id < 0) { + return -EINVAL; + } - if (strncmp(argv[i], "-s", strlen("-s")) == 0) { - return true; - } + g_config_params.numa_id = numa_id; + g_config_params.num_cpu = g_config_params.stack_num; + g_config_params.num_queue = g_config_params.num_cpu; + g_config_params.tot_queue_num = g_config_params.num_queue; - if (strncmp(argv[i], "-S", strlen("-S")) == 0) { - return true; - } - } + LSTACK_PRE_LOG(LSTACK_INFO, "NUMA node: %d\n", g_config_params.numa_id); - return false; + return 0; } -static int32_t parse_stack_cpu_number(void) + +static int32_t stack_bind_cpus(void) { - const config_setting_t *num_cpus = NULL; + int cnt = 0; + char *tmp_arg = NULL; const char *args = NULL; + const config_setting_t *num_cpus = NULL; - if (!g_config_params.seperate_send_recv) { - num_cpus = config_lookup(&g_config, "num_cpus"); - if (num_cpus == NULL) { - return -EINVAL; - } - - args = config_setting_get_string(num_cpus); - if (args == NULL) { - return -EINVAL; - } - - if (!have_corelist_arg(g_config_params.dpdk_argc, g_config_params.dpdk_argv)) { - int32_t idx = get_param_idx(g_config_params.dpdk_argc, g_config_params.dpdk_argv, OPT_BIND_CORELIST); - if (idx < 0) { - g_config_params.dpdk_argv[g_config_params.dpdk_argc] = strdup_assert_return(OPT_BIND_CORELIST); - g_config_params.dpdk_argc++; - - g_config_params.dpdk_argv[g_config_params.dpdk_argc] = strdup_assert_return(args); - g_config_params.dpdk_argc++; - } - } - - char *tmp_arg = strdup_assert_return(args); - int32_t cnt = separate_str_to_array(tmp_arg, g_config_params.cpus, CFG_MAX_CPUS, CFG_MAX_CPUS); - free(tmp_arg); - if (cnt <= 0 || cnt > CFG_MAX_CPUS) { - return -EINVAL; - } - - g_config_params.num_cpu = cnt; - g_config_params.num_queue = (uint16_t)cnt; - g_config_params.tot_queue_num = g_config_params.num_queue; - } else { - // send_num_cpus - num_cpus = config_lookup(&g_config, "send_num_cpus"); - if (num_cpus == NULL) { - return -EINVAL; - } - - args = config_setting_get_string(num_cpus); - if (args == NULL) { - return -EINVAL; - } - - if (!have_corelist_arg(g_config_params.dpdk_argc, g_config_params.dpdk_argv)) { - int32_t idx = get_param_idx(g_config_params.dpdk_argc, g_config_params.dpdk_argv, OPT_BIND_CORELIST); - if (idx < 0) { - g_config_params.dpdk_argv[g_config_params.dpdk_argc] = strdup_assert_return(OPT_BIND_CORELIST); - g_config_params.dpdk_argc++; - - g_config_params.dpdk_argv[g_config_params.dpdk_argc] = strdup_assert_return(args); - g_config_params.dpdk_argc++; - } - } - - char *tmp_arg_send = strdup_assert_return(args); - int32_t send_cpu_cnt = separate_str_to_array(tmp_arg_send, g_config_params.send_cpus, - CFG_MAX_CPUS, CFG_MAX_CPUS); - free(tmp_arg_send); - - // recv_num_cpus - num_cpus = config_lookup(&g_config, "recv_num_cpus"); - if (num_cpus == NULL) { - return -EINVAL; - } + num_cpus = config_lookup(&g_config, "num_cpus"); + if (num_cpus == NULL) { + return stack_bind_no_cpu(); + } - args = config_setting_get_string(num_cpus); - if (args == NULL) { - return -EINVAL; - } + args = config_setting_get_string(num_cpus); + if (args == NULL) { + return -EINVAL; + } - if (!have_corelist_arg(g_config_params.dpdk_argc, g_config_params.dpdk_argv)) { - int32_t idx = get_param_idx(g_config_params.dpdk_argc, g_config_params.dpdk_argv, OPT_BIND_CORELIST); - if (idx < 0) { - g_config_params.dpdk_argv[g_config_params.dpdk_argc] = strdup_assert_return(OPT_BIND_CORELIST); - g_config_params.dpdk_argc++; + strcpy(g_config_params.lcores, args); - g_config_params.dpdk_argv[g_config_params.dpdk_argc] = strdup_assert_return(args); - g_config_params.dpdk_argc++; - } - } + tmp_arg = strdup_assert_return(args); + cnt = separate_str_to_array(tmp_arg, g_config_params.cpus, CFG_MAX_CPUS, CFG_MAX_CPUS); + free(tmp_arg); + if (cnt <= 0) { + return stack_bind_no_cpu(); + } else if (cnt > CFG_MAX_CPUS) { + return -EINVAL; + } - char *tmp_arg_recv = strdup_assert_return(args); - int32_t recv_cpu_cnt = separate_str_to_array(tmp_arg_recv, g_config_params.recv_cpus, - CFG_MAX_CPUS, CFG_MAX_CPUS); - free(tmp_arg_recv); + g_config_params.num_cpu = cnt; + g_config_params.num_queue = (uint16_t)cnt; + g_config_params.tot_queue_num = g_config_params.num_queue; - if (send_cpu_cnt <= 0 || send_cpu_cnt > CFG_MAX_CPUS / 2 || send_cpu_cnt != recv_cpu_cnt) { - return -EINVAL; - } + return 0; +} - g_config_params.num_cpu = send_cpu_cnt; - g_config_params.num_queue = (uint16_t)send_cpu_cnt * 2; - g_config_params.tot_queue_num = g_config_params.num_queue; +static int32_t parse_stack_cpu_number(void) +{ + if (g_config_params.stack_num > 0) { + return stack_bind_no_cpu(); } - return 0; + return stack_bind_cpus(); +} + +static int32_t parse_stack_num(void) +{ + int32_t ret; + PARSE_ARG(g_config_params.stack_num, "stack_num", 0, 0, 320, ret); + return ret; } static int32_t parse_app_bind_numa(void) @@ -525,12 +474,12 @@ static int32_t parse_app_exclude_cpus(void) return 0; } -static int32_t numa_to_cpusnum(unsigned socket_id, uint32_t *cpulist, int32_t num) +static int32_t numa_to_cpusnum(unsigned numa_id, uint32_t *cpulist, int32_t num) { char path[PATH_MAX] = {0}; char strbuf[PATH_MAX] = {0}; - int32_t ret = snprintf_s(path, sizeof(path), PATH_MAX - 1, NUMA_CPULIST_PATH, socket_id); + int32_t ret = snprintf_s(path, sizeof(path), PATH_MAX - 1, NUMA_CPULIST_PATH, numa_id); if (ret < 0) { LSTACK_LOG(ERR, LSTACK, "snprintf numa_cpulist failed\n"); return -1; @@ -557,7 +506,7 @@ static int32_t stack_idle_cpuset(struct protocol_stack *stack, cpu_set_t *exclud { uint32_t cpulist[CPUS_MAX_NUM]; - int32_t cpunum = numa_to_cpusnum(stack->socket_id, cpulist, CPUS_MAX_NUM); + int32_t cpunum = numa_to_cpusnum(stack->numa_id, cpulist, CPUS_MAX_NUM); if (cpunum <= 0) { LSTACK_LOG(ERR, LSTACK, "numa_to_cpusnum failed\n"); return -1; @@ -584,12 +533,7 @@ int32_t init_stack_numa_cpuset(struct protocol_stack *stack) cpu_set_t stack_cpuset; CPU_ZERO(&stack_cpuset); for (int32_t idx = 0; idx < cfg->num_cpu; ++idx) { - if (!cfg->seperate_send_recv) { - CPU_SET(cfg->cpus[idx], &stack_cpuset); - } else { - CPU_SET(cfg->send_cpus[idx], &stack_cpuset); - CPU_SET(cfg->recv_cpus[idx], &stack_cpuset); - } + CPU_SET(cfg->cpus[idx], &stack_cpuset); } for (int32_t idx = 0; idx < cfg->app_exclude_num_cpu; ++idx) { @@ -831,6 +775,94 @@ int32_t gazelle_param_init(int32_t *argc, char **argv) return 0; } +static bool dpdk_have_corelist(int32_t argc, char **argv) +{ + for (uint32_t i = 0; i < argc; i++) { + if (strncmp(argv[i], OPT_BIND_CORELIST, strlen(OPT_BIND_CORELIST)) == 0) { + return true; + } + + if (strncmp(argv[i], "--lcores", strlen("--lcores")) == 0) { + return true; + } + + if (strncmp(argv[i], "-c", strlen("-c")) == 0) { + return true; + } + + if (strncmp(argv[i], "-s", strlen("-s")) == 0) { + return true; + } + + if (strncmp(argv[i], "-S", strlen("-S")) == 0) { + return true; + } + } + + return false; +} + +static bool dpdk_have_socket_mem(int32_t argc, char **argv) +{ + for (uint32_t i = 0; i < argc; i++) { + if (strncmp(argv[i], OPT_SOCKET_MEM, strlen(OPT_SOCKET_MEM)) == 0) { + return true; + } + } + + return false; +} + +static void dpdk_fill_socket_mem(void) +{ + uint32_t socket_mem_size = dpdk_total_socket_memory(); + + for (uint32_t i = 0; i < GAZELLE_MAX_NUMA_NODES; i++) { + if (i == g_config_params.numa_id) { + snprintf(g_config_params.socket_mem + strlen(g_config_params.socket_mem), + SOCKET_MEM_STRLEN - strlen(g_config_params.socket_mem), "%d", socket_mem_size); + } else { + snprintf(g_config_params.socket_mem + strlen(g_config_params.socket_mem), + SOCKET_MEM_STRLEN - strlen(g_config_params.socket_mem), "%d", 0); + } + if (i < (GAZELLE_MAX_NUMA_NODES - 1)) { + snprintf(g_config_params.socket_mem + strlen(g_config_params.socket_mem), + SOCKET_MEM_STRLEN - strlen(g_config_params.socket_mem), "%s", ","); + } + } +} + +static void dpdk_add_args(void) +{ + int idx; + uint16_t lcore_id; + + if (!dpdk_have_corelist(g_config_params.dpdk_argc, g_config_params.dpdk_argv)) { + if (g_config_params.stack_num > 0) { + RTE_LCORE_FOREACH(lcore_id) { + if (numa_node_of_cpu(lcore_id) == g_config_params.numa_id && rte_lcore_is_enabled(lcore_id)) { + snprintf_s(g_config_params.lcores, sizeof(g_config_params.lcores), + sizeof(g_config_params.lcores) - 1, "%d", lcore_id); + break; + } + } + } + g_config_params.dpdk_argv[g_config_params.dpdk_argc++] = strdup_assert_return(OPT_BIND_CORELIST); + g_config_params.dpdk_argv[g_config_params.dpdk_argc++] = strdup_assert_return(g_config_params.lcores); + } + + if (g_config_params.stack_num > 0) { + dpdk_fill_socket_mem(); + if (!dpdk_have_socket_mem(g_config_params.dpdk_argc, g_config_params.dpdk_argv)) { + g_config_params.dpdk_argv[g_config_params.dpdk_argc++] = strdup_assert_return(OPT_SOCKET_MEM); + g_config_params.dpdk_argv[g_config_params.dpdk_argc++] = strdup_assert_return(g_config_params.socket_mem); + } else { + idx = get_param_idx(g_config_params.dpdk_argc, g_config_params.dpdk_argv, OPT_SOCKET_MEM); + strcpy(g_config_params.dpdk_argv[idx + 1], g_config_params.socket_mem); + } + } +} + static int32_t parse_dpdk_args(void) { int32_t i; @@ -880,8 +912,10 @@ static int32_t parse_dpdk_args(void) (void)fprintf(stderr, "%s ", g_config_params.dpdk_argv[start_index + i]); } (void)fprintf(stderr, "\n"); - g_config_params.dpdk_argc++; + + dpdk_add_args(); + if (turn_args_to_config(g_config_params.dpdk_argc, g_config_params.dpdk_argv)) goto free_dpdk_args; @@ -1106,13 +1140,6 @@ static int32_t parse_unix_prefix(void) return 0; } -static int32_t parse_seperate_sendrecv_args(void) -{ - int32_t ret; - PARSE_ARG(g_config_params.seperate_send_recv, "seperate_send_recv", 0, 0, 1, ret); - return ret; -} - static int32_t parse_num_process(void) { if (g_config_params.use_ltran) { diff --git a/src/lstack/core/lstack_dpdk.c b/src/lstack/core/lstack_dpdk.c index 9294c5b..5141c3a 100644 --- a/src/lstack/core/lstack_dpdk.c +++ b/src/lstack/core/lstack_dpdk.c @@ -42,6 +42,8 @@ #include "lstack_log.h" #include "common/dpdk_common.h" +#include "common/gazelle_base_func.h" +#include "lstack_thread_rpc.h" #include "lstack_protocol_stack.h" #include "lstack_lwip.h" #include "lstack_cfg.h" @@ -107,6 +109,10 @@ int32_t thread_affinity_init(int32_t cpu_id) int32_t ret; cpu_set_t cpuset; + if (get_global_cfg_params()->stack_num > 0) { + return 0; + } + CPU_ZERO(&cpuset); CPU_SET(cpu_id, &cpuset); @@ -546,9 +552,6 @@ int32_t dpdk_ethdev_init(int port_id) int ret; int32_t rss_enable = 0; uint16_t nb_queues = get_global_cfg_params()->num_cpu; - if (get_global_cfg_params()->seperate_send_recv) { - nb_queues = get_global_cfg_params()->num_cpu * 2; - } if (!use_ltran()) { nb_queues = get_global_cfg_params()->tot_queue_num; @@ -616,24 +619,23 @@ int32_t dpdk_ethdev_init(int port_id) static int32_t dpdk_ethdev_setup(const struct eth_params *eth_params, uint16_t idx) { int32_t ret; - + uint16_t numa_id = 0; + struct cfg_params *cfg = get_global_cfg_params(); struct rte_mempool *rxtx_mbuf_pool = get_protocol_stack_group()->total_rxtx_pktmbuf_pool[idx]; - uint16_t socket_id = 0; - struct cfg_params *cfg = get_global_cfg_params(); if (!cfg->use_ltran && cfg->num_process == 1) { - socket_id = numa_node_of_cpu(cfg->cpus[idx]); + numa_id = (cfg->stack_num > 0) ? cfg->numa_id : numa_node_of_cpu(cfg->cpus[idx]); } else { - socket_id = cfg->process_numa[idx]; + numa_id = cfg->process_numa[idx]; } - ret = rte_eth_rx_queue_setup(eth_params->port_id, idx, eth_params->nb_rx_desc, socket_id, + ret = rte_eth_rx_queue_setup(eth_params->port_id, idx, eth_params->nb_rx_desc, numa_id, ð_params->rx_conf, rxtx_mbuf_pool); if (ret < 0) { LSTACK_LOG(ERR, LSTACK, "cannot setup rx_queue %hu: %s\n", idx, rte_strerror(-ret)); return -1; } - ret = rte_eth_tx_queue_setup(eth_params->port_id, idx, eth_params->nb_tx_desc, socket_id, + ret = rte_eth_tx_queue_setup(eth_params->port_id, idx, eth_params->nb_tx_desc, numa_id, ð_params->tx_conf); if (ret < 0) { LSTACK_LOG(ERR, LSTACK, "cannot setup tx_queue %hu: %s\n", idx, rte_strerror(-ret)); @@ -1034,3 +1036,45 @@ void dpdk_nic_features_get(struct gazelle_stack_dfx_data *dfx, uint16_t port_id) dfx->data.nic_features.rx_offload = dev_conf.rxmode.offloads; return; } + +uint32_t dpdk_pktmbuf_mempool_num(void) +{ + struct cfg_params *cfg = get_global_cfg_params(); + + return (MBUFPOOL_RESERVE_NUM + cfg->rxqueue_size + cfg->txqueue_size + + (cfg->tcp_conn_count * cfg->mbuf_count_per_conn) / cfg->num_queue); +} + +uint32_t dpdk_total_socket_memory(void) +{ + uint32_t elt_size = 0; + uint32_t per_pktmbuf_mempool_size = 0; + uint32_t per_rpc_mempool_size = 0; + uint32_t per_conn_ring_size = 0; + /* the actual fixed memory is about 50M, and 100M is reserved here. + * including all hugepages memory used by lwip. + */ + uint32_t fixed_mem = 100; + uint32_t total_socket_memory = 0; + struct cfg_params *cfg = get_global_cfg_params(); + + /* calculate the memory(bytes) of rxtx_mempool */ + elt_size = sizeof(struct rte_mbuf) + MBUF_SZ + RTE_ALIGN(sizeof(struct mbuf_private), RTE_CACHE_LINE_SIZE); + per_pktmbuf_mempool_size = rte_mempool_calc_obj_size(elt_size, 0, NULL); + + /* calculate the memory(bytes) of rpc_mempool, reserved num is (app threads + lstack threads + listen thread) */ + elt_size = sizeof(struct rpc_msg); + per_rpc_mempool_size = rte_mempool_calc_obj_size(elt_size, 0, NULL); + + /* calculate the memory(bytes) of rings, reserved num is GAZELLE_LSTACK_MAX_CONN. */ + per_conn_ring_size = rte_ring_get_memsize(cfg->send_ring_size) + + rte_ring_get_memsize(cfg->recv_ring_size) + + rte_ring_get_memsize(DEFAULT_ACCEPTMBOX_SIZE); + + total_socket_memory = fixed_mem + bytes_to_mb( + (per_pktmbuf_mempool_size * dpdk_pktmbuf_mempool_num()) * cfg->num_queue + + per_rpc_mempool_size * cfg->rpc_msg_max * (RPC_MEMPOOL_THREAD_NUM + cfg->num_queue + 1) + + per_conn_ring_size * GAZELLE_LSTACK_MAX_CONN); + + return total_socket_memory; +} diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c index 553dff3..9cfd54f 100644 --- a/src/lstack/core/lstack_protocol_stack.c +++ b/src/lstack/core/lstack_protocol_stack.c @@ -126,16 +126,9 @@ struct protocol_stack *get_bind_protocol_stack(void) pthread_spin_lock(&stack_group->socket_lock); for (uint16_t i = 0; i < stack_group->stack_num; i++) { struct protocol_stack* stack = stack_group->stacks[i]; - if (get_global_cfg_params()->seperate_send_recv) { - if (stack->is_send_thread && stack->conn_num < min_conn_num) { - index = i; - min_conn_num = stack->conn_num; - } - } else { - if (stack->conn_num < min_conn_num) { - index = i; - min_conn_num = stack->conn_num; - } + if (stack->conn_num < min_conn_num) { + index = i; + min_conn_num = stack->conn_num; } } } @@ -154,16 +147,9 @@ int get_min_conn_stack(struct protocol_stack_group *stack_group) for (int i = 0; i < stack_group->stack_num; i++) { stack = stack_group->stacks[i]; - if (get_global_cfg_params()->seperate_send_recv) { - if (!stack->is_send_thread && stack->conn_num < min_conn_num) { - min_conn_stk_idx = i; - min_conn_num = stack->conn_num; - } - } else { - if (stack->conn_num < min_conn_num) { - min_conn_stk_idx = i; - min_conn_num = stack->conn_num; - } + if (stack->conn_num < min_conn_num) { + min_conn_stk_idx = i; + min_conn_num = stack->conn_num; } } return min_conn_stk_idx; @@ -174,6 +160,10 @@ void bind_to_stack_numa(struct protocol_stack *stack) int32_t ret; pthread_t tid = pthread_self(); + if (get_global_cfg_params()->stack_num > 0) { + return; + } + ret = pthread_setaffinity_np(tid, sizeof(stack->idle_cpuset), &stack->idle_cpuset); if (ret != 0) { LSTACK_LOG(ERR, LSTACK, "thread %d setaffinity to stack %hu failed\n", rte_gettid(), stack->queue_id); @@ -268,18 +258,10 @@ static int32_t create_thread(void *arg, char *thread_name, stack_thread_func fun return -1; } - if (get_global_cfg_params()->seperate_send_recv) { - ret = sprintf_s(name, sizeof(name), "%s", thread_name); - if (ret < 0) { - LSTACK_LOG(ERR, LSTACK, "set name failed\n"); - return -1; - } - } else { - ret = sprintf_s(name, sizeof(name), "%s%02hu", thread_name, t_params->queue_id); - if (ret < 0) { - LSTACK_LOG(ERR, LSTACK, "set name failed\n"); - return -1; - } + ret = sprintf_s(name, sizeof(name), "%s%02hu", thread_name, t_params->queue_id); + if (ret < 0) { + LSTACK_LOG(ERR, LSTACK, "set name failed\n"); + return -1; } ret = pthread_create(&tid, NULL, func, arg); @@ -343,6 +325,7 @@ static int32_t init_stack_value(struct protocol_stack *stack, void *arg) { struct thread_params *t_params = (struct thread_params*) arg; struct protocol_stack_group *stack_group = get_protocol_stack_group(); + struct cfg_params *cfg_params = get_global_cfg_params(); stack->tid = rte_gettid(); stack->queue_id = t_params->queue_id; @@ -363,24 +346,15 @@ static int32_t init_stack_value(struct protocol_stack *stack, void *arg) return -1; } - int idx = t_params->idx; - if (get_global_cfg_params()->seperate_send_recv) { - // 2: idx is even, stack is recv thread, idx is odd, stack is send thread - if (idx % 2 == 0) { - stack->cpu_id = get_global_cfg_params()->recv_cpus[idx / 2]; - stack->is_send_thread = 0; - } else { - stack->cpu_id = get_global_cfg_params()->send_cpus[idx / 2]; - stack->is_send_thread = 1; - } + if (cfg_params->stack_num > 0) { + stack->numa_id = cfg_params->numa_id; } else { - stack->cpu_id = get_global_cfg_params()->cpus[idx]; - } - - stack->socket_id = numa_node_of_cpu(stack->cpu_id); - if (stack->socket_id < 0) { - LSTACK_LOG(ERR, LSTACK, "numa_node_of_cpu failed\n"); - return -1; + stack->cpu_id = cfg_params->cpus[t_params->idx]; + stack->numa_id = numa_node_of_cpu(stack->cpu_id); + if (stack->numa_id < 0) { + LSTACK_LOG(ERR, LSTACK, "numa_node_of_cpu failed\n"); + return -1; + } } if (pktmbuf_pool_init(stack) != 0) { @@ -441,7 +415,10 @@ static struct protocol_stack *stack_thread_init(void *arg) if (thread_affinity_init(stack->cpu_id) != 0) { goto END; } - RTE_PER_LCORE(_lcore_id) = stack->cpu_id; + + if (get_global_cfg_params()->stack_num == 0) { + RTE_PER_LCORE(_lcore_id) = stack->cpu_id; + } lwip_init(); /* Using errno to return lwip_init() result. */ @@ -591,9 +568,7 @@ static void* gazelle_stack_thread(void *arg) static int stack_group_init_mempool(void) { struct cfg_params *cfg_params = get_global_cfg_params(); - uint32_t total_mbufs = 0; - uint32_t total_conn_mbufs = cfg_params->mbuf_count_per_conn * cfg_params->tcp_conn_count; - uint32_t total_nic_mbufs = cfg_params->rxqueue_size + cfg_params->txqueue_size; + uint32_t total_mbufs = dpdk_pktmbuf_mempool_num(); struct rte_mempool *rxtx_mbuf = NULL; uint32_t cpu_id = 0; unsigned numa_id = 0; @@ -603,8 +578,12 @@ static int stack_group_init_mempool(void) "config::num_cpu=%d num_process=%d \n", cfg_params->num_cpu, cfg_params->num_process); for (int cpu_idx = 0; cpu_idx < cfg_params->num_queue; cpu_idx++) { - cpu_id = cfg_params->cpus[cpu_idx]; - numa_id = numa_node_of_cpu(cpu_id); + if (cfg_params->stack_num > 0) { + numa_id = cfg_params->numa_id; + } else { + cpu_id = cfg_params->cpus[cpu_idx]; + numa_id = numa_node_of_cpu(cpu_id); + } for (int process_idx = 0; process_idx < cfg_params->num_process; process_idx++) { queue_id = cpu_idx * cfg_params->num_process + process_idx; @@ -613,11 +592,9 @@ static int stack_group_init_mempool(void) return -1; } - total_mbufs = (total_conn_mbufs / cfg_params->num_queue) + total_nic_mbufs + MBUFPOOL_RESERVE_NUM; rxtx_mbuf = create_pktmbuf_mempool("rxtx_mbuf", total_mbufs, RXTX_CACHE_SZ, queue_id, numa_id); if (rxtx_mbuf == NULL) { - LSTACK_LOG(ERR, LSTACK, "cpuid=%u, numid=%d , rxtx_mbuf idx= %d create_pktmbuf_mempool fail\n", - cpu_id, numa_id, queue_id); + LSTACK_LOG(ERR, LSTACK, "numid=%d, rxtx_mbuf idx=%d, create_pktmbuf_mempool fail\n", numa_id, queue_id); return -1; } @@ -696,23 +673,9 @@ int stack_setup_thread(void) } } for (i = 0; i < queue_num; i++) { - if (get_global_cfg_params()->seperate_send_recv) { - if (i % 2 == 0) { - ret = sprintf_s(name, sizeof(name), "%s_%d_%d", LSTACK_RECV_THREAD_NAME, process_index, i / 2); - if (ret < 0) { - goto OUT1; - } - } else { - ret = sprintf_s(name, sizeof(name), "%s_%d_%d", LSTACK_SEND_THREAD_NAME, process_index, i / 2); - if (ret < 0) { - goto OUT1; - } - } - } else { - ret = sprintf_s(name, sizeof(name), "%s", LSTACK_THREAD_NAME); - if (ret < 0) { - goto OUT1; - } + ret = sprintf_s(name, sizeof(name), "%s", LSTACK_THREAD_NAME); + if (ret < 0) { + goto OUT1; } t_params[i]->idx = i; diff --git a/src/lstack/include/lstack_cfg.h b/src/lstack/include/lstack_cfg.h index 876423d..073aab6 100644 --- a/src/lstack/include/lstack_cfg.h +++ b/src/lstack/include/lstack_cfg.h @@ -20,6 +20,7 @@ #include #include "lstack_protocol_stack.h" +#include "common/gazelle_reg_msg.h" #include "common/gazelle_opt.h" #define BASE_BIN_SCALE 2 @@ -36,7 +37,6 @@ #define ARP_MAX_ENTRIES 1024 #define LOG_DIR_PATH PATH_MAX #define LOG_LEVEL_LEN 16 -#define GAZELLE_MAX_NUMA_NODES 8 #define MAX_PROCESS_NUM 32 /* Default value of low power mode parameters */ @@ -77,6 +77,8 @@ struct cfg_params { uintptr_t base_virtaddr; char file_prefix[PATH_MAX]; } sec_attach_arg; + char socket_mem[SOCKET_MEM_STRLEN]; + char lcores[RTE_MAX_LCORE]; }; struct { // eth @@ -109,6 +111,8 @@ struct cfg_params { struct { // stack uint16_t num_cpu; + uint16_t numa_id; + uint16_t stack_num; uint32_t cpus[CFG_MAX_CPUS]; bool main_thread_affinity; @@ -147,10 +151,6 @@ struct cfg_params { uint32_t process_numa[PROTOCOL_STACK_MAX]; bool tuple_filter; bool use_sockmap; - - bool seperate_send_recv; - uint32_t send_cpus[CFG_MAX_CPUS]; - uint32_t recv_cpus[CFG_MAX_CPUS]; }; }; diff --git a/src/lstack/include/lstack_dpdk.h b/src/lstack/include/lstack_dpdk.h index 0210843..87219c2 100644 --- a/src/lstack/include/lstack_dpdk.h +++ b/src/lstack/include/lstack_dpdk.h @@ -66,5 +66,7 @@ void dpdk_nic_xstats_get(struct gazelle_stack_dfx_data *dfx, uint16_t port_id); void dpdk_nic_features_get(struct gazelle_stack_dfx_data *dfx, uint16_t port_id); bool dpdk_nic_is_xdp(void); +uint32_t dpdk_pktmbuf_mempool_num(void); +uint32_t dpdk_total_socket_memory(void); #endif /* GAZELLE_DPDK_H */ diff --git a/src/lstack/include/lstack_protocol_stack.h b/src/lstack/include/lstack_protocol_stack.h index 4d10ac2..068e9d2 100644 --- a/src/lstack/include/lstack_protocol_stack.h +++ b/src/lstack/include/lstack_protocol_stack.h @@ -40,7 +40,7 @@ struct protocol_stack { uint32_t tid; uint16_t queue_id; uint16_t port_id; - uint16_t socket_id; + uint16_t numa_id; uint16_t cpu_id; uint32_t stack_idx; cpu_set_t idle_cpuset; /* idle cpu in numa of stack, app thread bind to it */ @@ -56,7 +56,6 @@ struct protocol_stack { uint32_t reg_head; volatile bool low_power; - bool is_send_thread; char pad1 __rte_cache_aligned; rpc_queue dfx_rpc_queue; diff --git a/src/lstack/include/lstack_thread_rpc.h b/src/lstack/include/lstack_thread_rpc.h index c284d29..c74981f 100644 --- a/src/lstack/include/lstack_thread_rpc.h +++ b/src/lstack/include/lstack_thread_rpc.h @@ -26,6 +26,8 @@ #define MSG_ARG_4 (4) #define RPM_MSG_ARG_SIZE (5) +#define RPC_MEMPOOL_THREAD_NUM 64 + typedef struct rpc_queue rpc_queue; struct rpc_queue { struct lockless_queue queue; diff --git a/src/lstack/netif/lstack_flow.c b/src/lstack/netif/lstack_flow.c index 1ca3314..ec09e45 100644 --- a/src/lstack/netif/lstack_flow.c +++ b/src/lstack/netif/lstack_flow.c @@ -639,11 +639,7 @@ int distribute_pakages(struct rte_mbuf *mbuf) return TRANSFER_KERNEL; } - if (get_global_cfg_params()->seperate_send_recv) { - queue_id = user_process_idx * each_process_queue_num + (index / 2) * 2; - } else { - queue_id = user_process_idx * each_process_queue_num + index; - } + queue_id = user_process_idx * each_process_queue_num + index; if (queue_id != 0) { if (user_process_idx == 0) { transfer_tcp_to_thread(mbuf, queue_id); -- 2.33.0