From 4b5ead09ab19b08a46a9ffba7cbc5e42e361dcc1 Mon Sep 17 00:00:00 2001 From: yangchen Date: Sat, 23 Nov 2024 18:04:55 +0800 Subject: [PATCH] xdp: support stack bind numa --- src/common/gazelle_opt.h | 5 +- src/lstack/core/lstack_cfg.c | 101 ++++++++---------------- src/lstack/core/lstack_dpdk.c | 20 ----- src/lstack/core/lstack_protocol_stack.c | 82 +++++++++++++++++-- src/lstack/include/lstack_cfg.h | 11 +-- src/lstack/include/lstack_dpdk.h | 1 - 6 files changed, 119 insertions(+), 101 deletions(-) diff --git a/src/common/gazelle_opt.h b/src/common/gazelle_opt.h index 6d787b9..98f1afd 100644 --- a/src/common/gazelle_opt.h +++ b/src/common/gazelle_opt.h @@ -105,8 +105,11 @@ #define LSTACK_SEND_THREAD_NAME "lstack_send" #define LSTACK_RECV_THREAD_NAME "lstack_recv" -#define LSTACK_THREAD_NAME "gazellelstack" +#define LSTACK_THREAD_NAME "gazellelstack" #define SLEEP_US_BEFORE_LINK_UP 10000 +#define CPUS_MAX_NUM 640 +#define GAZELLE_MAX_NUMA_NODES 4 + #endif /* _GAZELLE_OPT_H_ */ diff --git a/src/lstack/core/lstack_cfg.c b/src/lstack/core/lstack_cfg.c index 0ca838d..c8ce5c3 100644 --- a/src/lstack/core/lstack_cfg.c +++ b/src/lstack/core/lstack_cfg.c @@ -43,7 +43,6 @@ #define NUMA_CPULIST_PATH "/sys/devices/system/node/node%u/cpulist" #define DEV_MAC_LEN 17 #define DEV_PCI_ADDR_LEN 12 -#define CPUS_MAX_NUM 256 #define BOND_MIIMON_MIN 1 #define BOND_MIIMON_MAX INT_MAX @@ -127,6 +126,7 @@ static struct config_vector_t g_config_tbl[] = { { "send_ring_size", parse_send_ring_size }, { "recv_ring_size", parse_recv_ring_size }, { "rpc_msg_max", parse_rpc_msg_max }, + { "app_bind_numa", parse_app_bind_numa }, { "stack_num", parse_stack_num }, { "num_cpus", parse_stack_cpu_number }, { "dpdk_args", parse_dpdk_args }, @@ -139,7 +139,6 @@ static struct config_vector_t g_config_tbl[] = { { "low_power_mode", parse_low_power_mode }, { "kni_switch", parse_kni_switch }, { "listen_shadow", parse_listen_shadow }, - { "app_bind_numa", parse_app_bind_numa }, { "app_exclude_cpus", parse_app_exclude_cpus }, { "main_thread_affinity", parse_main_thread_affinity }, { "unix_prefix", parse_unix_prefix }, @@ -446,6 +445,7 @@ static int32_t stack_bind_no_cpu(void) g_config_params.num_cpu = g_config_params.stack_num; g_config_params.num_queue = g_config_params.num_cpu; g_config_params.tot_queue_num = g_config_params.num_queue; + g_config_params.app_bind_numa = true; LSTACK_PRE_LOG(LSTACK_INFO, "NUMA node: %d\n", g_config_params.numa_id); @@ -473,11 +473,11 @@ static int32_t stack_bind_cpus(void) strcpy(g_config_params.lcores, args); tmp_arg = strdup_assert_return(args); - cnt = separate_str_to_array(tmp_arg, g_config_params.cpus, CFG_MAX_CPUS, CFG_MAX_CPUS); + cnt = separate_str_to_array(tmp_arg, g_config_params.cpus, CPUS_MAX_NUM, CPUS_MAX_NUM); free(tmp_arg); if (cnt <= 0) { return stack_bind_no_cpu(); - } else if (cnt > CFG_MAX_CPUS) { + } else if (cnt > CPUS_MAX_NUM) { return -EINVAL; } @@ -534,9 +534,9 @@ static int32_t parse_app_exclude_cpus(void) } tmp_arg = strdup_assert_return(args); - cnt = separate_str_to_array(tmp_arg, g_config_params.app_exclude_cpus, CFG_MAX_CPUS, CFG_MAX_CPUS); + cnt = separate_str_to_array(tmp_arg, g_config_params.app_exclude_cpus, CPUS_MAX_NUM, CPUS_MAX_NUM); free(tmp_arg); - if (cnt <= 0 || cnt > CFG_MAX_CPUS) { + if (cnt <= 0 || cnt > CPUS_MAX_NUM) { return -EINVAL; } @@ -544,18 +544,20 @@ static int32_t parse_app_exclude_cpus(void) return 0; } -static int32_t numa_to_cpusnum(unsigned numa_id, uint32_t *cpulist, int32_t num) +int numa_to_cpusnum(uint16_t numa_id, uint32_t *cpulist, int num) { + int ret; + int fd; char path[PATH_MAX] = {0}; char strbuf[PATH_MAX] = {0}; - int32_t ret = snprintf_s(path, sizeof(path), PATH_MAX - 1, NUMA_CPULIST_PATH, numa_id); + ret = snprintf_s(path, sizeof(path), PATH_MAX - 1, NUMA_CPULIST_PATH, numa_id); if (ret < 0) { LSTACK_LOG(ERR, LSTACK, "snprintf numa_cpulist failed\n"); return -1; } - int32_t fd = open(path, O_RDONLY); + fd = open(path, O_RDONLY); if (fd < 0) { LSTACK_LOG(ERR, LSTACK, "open %s failed\n", path); return -1; @@ -568,55 +570,7 @@ static int32_t numa_to_cpusnum(unsigned numa_id, uint32_t *cpulist, int32_t num) return -1; } - int32_t count = separate_str_to_array(strbuf, cpulist, num, CFG_MAX_CPUS); - return count; -} - -static int32_t stack_idle_cpuset(struct protocol_stack *stack, cpu_set_t *exclude) -{ - uint32_t cpulist[CPUS_MAX_NUM]; - - int32_t cpunum = numa_to_cpusnum(stack->numa_id, cpulist, CPUS_MAX_NUM); - if (cpunum <= 0) { - LSTACK_LOG(ERR, LSTACK, "numa_to_cpusnum failed\n"); - return -1; - } - - CPU_ZERO(&stack->idle_cpuset); - for (int32_t i = 0; i < cpunum; i++) { - /* skip stack cpu */ - if (CPU_ISSET(cpulist[i], exclude)) { - continue; - } - - CPU_SET(cpulist[i], &stack->idle_cpuset); - } - - return 0; -} - -int32_t init_stack_numa_cpuset(struct protocol_stack *stack) -{ - int32_t ret; - struct cfg_params *cfg = get_global_cfg_params(); - - cpu_set_t stack_cpuset; - CPU_ZERO(&stack_cpuset); - for (int32_t idx = 0; idx < cfg->num_cpu; ++idx) { - CPU_SET(cfg->cpus[idx], &stack_cpuset); - } - - for (int32_t idx = 0; idx < cfg->app_exclude_num_cpu; ++idx) { - CPU_SET(cfg->app_exclude_cpus[idx], &stack_cpuset); - } - - ret = stack_idle_cpuset(stack, &stack_cpuset); - if (ret < 0) { - LSTACK_LOG(ERR, LSTACK, "thread_get_cpuset stack(%u) failed\n", stack->tid); - return -1; - } - - return 0; + return separate_str_to_array(strbuf, cpulist, num, CPUS_MAX_NUM); } static int32_t gazelle_parse_base_virtaddr(const char *arg, uintptr_t *base_vaddr) @@ -883,6 +837,28 @@ static bool dpdk_have_socket_mem(int32_t argc, char **argv) return false; } +static void dpdk_fill_lcore(void) +{ + uint16_t lcore_id; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + if (sched_getaffinity(0, sizeof(cpu_set_t), &cpuset) == -1) { + LSTACK_LOG(ERR, LSTACK, "sched_getaffinity failed\n"); + return; + } + + for (lcore_id = 0; lcore_id < CPU_SETSIZE; lcore_id++) { + if (CPU_ISSET(lcore_id, &cpuset) && + numa_node_of_cpu(lcore_id) == g_config_params.numa_id && + rte_lcore_is_enabled(lcore_id)) { + snprintf_s(g_config_params.lcores, sizeof(g_config_params.lcores), + sizeof(g_config_params.lcores) - 1, "%d", lcore_id); + break; + } + } +} + static void dpdk_fill_socket_mem(void) { uint32_t socket_mem_size = dpdk_total_socket_memory(); @@ -905,17 +881,10 @@ static void dpdk_fill_socket_mem(void) static void dpdk_adjust_args(void) { int idx; - uint16_t lcore_id; if (!dpdk_have_corelist(g_config_params.dpdk_argc, g_config_params.dpdk_argv)) { if (g_config_params.stack_num > 0) { - RTE_LCORE_FOREACH(lcore_id) { - if (numa_node_of_cpu(lcore_id) == g_config_params.numa_id && rte_lcore_is_enabled(lcore_id)) { - snprintf_s(g_config_params.lcores, sizeof(g_config_params.lcores), - sizeof(g_config_params.lcores) - 1, "%d", lcore_id); - break; - } - } + dpdk_fill_lcore(); } g_config_params.dpdk_argv[g_config_params.dpdk_argc++] = strdup_assert_return(OPT_BIND_CORELIST); g_config_params.dpdk_argv[g_config_params.dpdk_argc++] = strdup_assert_return(g_config_params.lcores); diff --git a/src/lstack/core/lstack_dpdk.c b/src/lstack/core/lstack_dpdk.c index 5141c3a..3023a6c 100644 --- a/src/lstack/core/lstack_dpdk.c +++ b/src/lstack/core/lstack_dpdk.c @@ -104,26 +104,6 @@ int32_t thread_affinity_default(void) return 0; } -int32_t thread_affinity_init(int32_t cpu_id) -{ - int32_t ret; - cpu_set_t cpuset; - - if (get_global_cfg_params()->stack_num > 0) { - return 0; - } - - CPU_ZERO(&cpuset); - CPU_SET(cpu_id, &cpuset); - - ret = rte_thread_set_affinity(&cpuset); - if (ret != 0) { - LSTACK_LOG(ERR, LSTACK, "thread %d pthread_setaffinity_np failed ret=%d\n", rte_gettid(), ret); - } - - return 0; -} - int32_t dpdk_eal_init(void) { int32_t ret; diff --git a/src/lstack/core/lstack_protocol_stack.c b/src/lstack/core/lstack_protocol_stack.c index 9cfd54f..2c60a49 100644 --- a/src/lstack/core/lstack_protocol_stack.c +++ b/src/lstack/core/lstack_protocol_stack.c @@ -32,9 +32,8 @@ #include "lstack_epoll.h" #include "lstack_stack_stat.h" #include "lstack_virtio.h" -#include "lstack_protocol_stack.h" - #include "lstack_interrupt.h" +#include "lstack_protocol_stack.h" #if RTE_VERSION < RTE_VERSION_NUM(23, 11, 0, 0) #include @@ -161,6 +160,7 @@ void bind_to_stack_numa(struct protocol_stack *stack) pthread_t tid = pthread_self(); if (get_global_cfg_params()->stack_num > 0) { + numa_run_on_node(stack->numa_id); return; } @@ -187,6 +187,75 @@ void thread_bind_stack(struct protocol_stack *stack) } } +static int stack_affinity_cpu(int cpu_id) +{ + int32_t ret; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(cpu_id, &cpuset); + + ret = rte_thread_set_affinity(&cpuset); + if (ret != 0) { + LSTACK_LOG(ERR, LSTACK, "thread %d pthread_setaffinity_np failed ret=%d\n", rte_gettid(), ret); + } + + return ret; +} + +static void stack_affinity_numa(int numa_id) +{ + numa_run_on_node(numa_id); +} + +static int32_t stack_idle_cpuset(struct protocol_stack *stack, cpu_set_t *exclude) +{ + int32_t cpunum; + uint32_t cpulist[CPUS_MAX_NUM]; + + cpunum = numa_to_cpusnum(stack->numa_id, cpulist, CPUS_MAX_NUM); + if (cpunum <= 0) { + LSTACK_LOG(ERR, LSTACK, "numa_to_cpusnum failed\n"); + return -1; + } + + CPU_ZERO(&stack->idle_cpuset); + for (uint32_t i = 0; i < cpunum; i++) { + /* skip stack cpu */ + if (CPU_ISSET(cpulist[i], exclude)) { + continue; + } + + CPU_SET(cpulist[i], &stack->idle_cpuset); + } + + return 0; +} + +static int32_t init_stack_numa_cpuset(struct protocol_stack *stack) +{ + int32_t ret; + struct cfg_params *cfg = get_global_cfg_params(); + + cpu_set_t stack_cpuset; + CPU_ZERO(&stack_cpuset); + for (int32_t idx = 0; idx < cfg->num_cpu; ++idx) { + CPU_SET(cfg->cpus[idx], &stack_cpuset); + } + + for (int32_t idx = 0; idx < cfg->app_exclude_num_cpu; ++idx) { + CPU_SET(cfg->app_exclude_cpus[idx], &stack_cpuset); + } + + ret = stack_idle_cpuset(stack, &stack_cpuset); + if (ret < 0) { + LSTACK_LOG(ERR, LSTACK, "thread_get_cpuset stack(%u) failed\n", stack->tid); + return -1; + } + + return 0; +} + static uint32_t get_protocol_traffic(struct protocol_stack *stack) { if (use_ltran()) { @@ -412,12 +481,13 @@ static struct protocol_stack *stack_thread_init(void *arg) goto END; } - if (thread_affinity_init(stack->cpu_id) != 0) { - goto END; - } - if (get_global_cfg_params()->stack_num == 0) { + if (stack_affinity_cpu(stack->cpu_id) != 0) { + goto END; + } RTE_PER_LCORE(_lcore_id) = stack->cpu_id; + } else { + stack_affinity_numa(stack->numa_id); } lwip_init(); diff --git a/src/lstack/include/lstack_cfg.h b/src/lstack/include/lstack_cfg.h index 0fd5323..07a97cb 100644 --- a/src/lstack/include/lstack_cfg.h +++ b/src/lstack/include/lstack_cfg.h @@ -36,7 +36,6 @@ #define RX_RING_NAME 64 #define MBUF_POOL_NAME 64 -#define CFG_MAX_CPUS 512 #define CFG_MAX_PORTS UINT8_MAX #define ARP_MAX_ENTRIES 1024 #define LOG_DIR_PATH PATH_MAX @@ -118,12 +117,12 @@ struct cfg_params { uint16_t num_cpu; uint16_t numa_id; uint16_t stack_num; - uint32_t cpus[CFG_MAX_CPUS]; + uint32_t cpus[CPUS_MAX_NUM]; bool main_thread_affinity; bool app_bind_numa; uint16_t app_exclude_num_cpu; - uint32_t app_exclude_cpus[CFG_MAX_CPUS]; + uint32_t app_exclude_cpus[CPUS_MAX_NUM]; bool stack_mode_rtc; bool listen_shadow; // true:listen in all stack thread. false:listen in one stack thread. @@ -168,10 +167,8 @@ static inline uint8_t use_ltran(void) int cfg_init(void); int gazelle_param_init(int *argc, char **argv); -int gazelle_copy_param(const char *param, bool is_double, - int *argc, char argv[][PATH_MAX]); - +int gazelle_copy_param(const char *param, bool is_double, int *argc, char argv[][PATH_MAX]); int match_host_addr(ip_addr_t *addr); -int32_t init_stack_numa_cpuset(struct protocol_stack *stack); +int numa_to_cpusnum(uint16_t numa_id, uint32_t *cpulist, int num); #endif /* GAZELLE_NET_CFG_H */ diff --git a/src/lstack/include/lstack_dpdk.h b/src/lstack/include/lstack_dpdk.h index 87219c2..c2142d6 100644 --- a/src/lstack/include/lstack_dpdk.h +++ b/src/lstack/include/lstack_dpdk.h @@ -45,7 +45,6 @@ int dpdk_ethdev_start(void); int init_dpdk_ethdev(void); int thread_affinity_default(void); -int thread_affinity_init(int cpu_id); int32_t create_shared_ring(struct protocol_stack *stack); int32_t fill_mbuf_to_ring(struct rte_mempool *mempool, struct rte_ring *ring, uint32_t mbuf_num); -- 2.33.0