Revert "Support initializing HBW nodes from memory_locality" patch
The interface that supports the function of initializing hardware bandwidth nodes form memory_locality is deleted from the oprating system kernel. Therefore, the memkind interface is deleted because the memkind interface needs to work with the kernel. Signed-off-by: JiangShui Yang <yangjiangshui@h-partners.com> (cherry picked from commit 597108c78540e6eb3d10866d765929f776d9f1f4)
This commit is contained in:
parent
2e11589c7c
commit
28f96d1a5c
@ -1,340 +0,0 @@
|
||||
From 448eb95b45b0cf6ecc7cf1a3e24056a2fdae85bd Mon Sep 17 00:00:00 2001
|
||||
From: Yicong Yang <yangyicong@hisilicon.com>
|
||||
Date: Fri, 13 Oct 2023 15:21:11 +0800
|
||||
Subject: [PATCH] Support initializing HBW nodes from memory_locality
|
||||
|
||||
In current implementation we mainly infer the HBW nodes from the
|
||||
HMAT/SLIT, which may not describe all the cases. For example
|
||||
the HMAT/SLIT cannot describe the topology below:
|
||||
|
||||
[ Node 0 ]
|
||||
[ CPU 0-3 ][ CPU 4-7 ]
|
||||
| |
|
||||
[ HBM 0 ][ HBM 1 ]
|
||||
[ Node 1 ][ Node 2 ]
|
||||
|
||||
CPU 0-7 are in one NUMA node, but CPU 0-3 is closest to HBM 0 while
|
||||
CPU 4-7 is closest to HBM 1. Current HMAT/SLIT cannot support this
|
||||
case.
|
||||
|
||||
In order to support this, openeuler has merged a HBM device driver
|
||||
to export the topology by sysfs[1]. The description of above topology
|
||||
will be like:
|
||||
$ cat /sys/kernel/hbm_memory/memory_topo/memory_locality
|
||||
1 0-3
|
||||
1 4-7
|
||||
|
||||
This patch cooperate with the HBM device driver to support initializing
|
||||
the HBW nodes from memory_locality for memkind. Will try to obtains
|
||||
the HBW nodes by parsing the memory_locality first, on failure or there
|
||||
is no memory_locality on the system will fallback to HMAT/SLIT. User
|
||||
can disable this function by MEMKIND_DISABLE_MEMORY_LOCALITY=1 as well.
|
||||
|
||||
[1] https://gitee.com/openeuler/kernel/pulls/451
|
||||
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
|
||||
---
|
||||
include/memkind/internal/memkind_bitmask.h | 2 +
|
||||
src/memkind_bitmask.c | 185 +++++++++++++++++++++
|
||||
src/memkind_hbw.c | 42 +++++
|
||||
3 files changed, 229 insertions(+)
|
||||
|
||||
diff --git a/include/memkind/internal/memkind_bitmask.h b/include/memkind/internal/memkind_bitmask.h
|
||||
index 5c5b8434..6b0c3f64 100644
|
||||
--- a/include/memkind/internal/memkind_bitmask.h
|
||||
+++ b/include/memkind/internal/memkind_bitmask.h
|
||||
@@ -12,6 +12,8 @@ extern "C" {
|
||||
|
||||
typedef int (*get_node_bitmask)(struct bitmask **);
|
||||
|
||||
+int set_numanode_from_memory_locality(void **numanode,
|
||||
+ memkind_node_variant_t node_variant);
|
||||
int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode,
|
||||
memkind_node_variant_t node_variant);
|
||||
int set_bitmask_for_current_numanode(unsigned long *nodemask,
|
||||
diff --git a/src/memkind_bitmask.c b/src/memkind_bitmask.c
|
||||
index 4f6d9f00..84300395 100644
|
||||
--- a/src/memkind_bitmask.c
|
||||
+++ b/src/memkind_bitmask.c
|
||||
@@ -1,9 +1,11 @@
|
||||
// SPDX-License-Identifier: BSD-2-Clause
|
||||
/* Copyright (C) 2019 - 2021 Intel Corporation. */
|
||||
|
||||
+#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
+#include <stdio.h>
|
||||
|
||||
#include <memkind/internal/memkind_bitmask.h>
|
||||
#include <memkind/internal/memkind_log.h>
|
||||
@@ -12,6 +14,89 @@
|
||||
// Vector of CPUs with memory NUMA Node id(s)
|
||||
VEC(vec_cpu_node, int);
|
||||
|
||||
+void init_node_closet_cpu(cpu_set_t **cpunode_mask, int num_cpu, int num_nodes)
|
||||
+{
|
||||
+ char *line = NULL;
|
||||
+ size_t len = 0;
|
||||
+ ssize_t n;
|
||||
+ FILE *f;
|
||||
+
|
||||
+ /*
|
||||
+ * The content of /sys/kernel/hbm_memory/memory_topo/memory_locality should
|
||||
+ * be like:
|
||||
+ * 2 0-3
|
||||
+ * 3 4-7
|
||||
+ * 4 8-11
|
||||
+ * 5 12-15
|
||||
+ * 6 16-19
|
||||
+ * 7 20-23
|
||||
+ * 8 24-27
|
||||
+ * 9 28-31
|
||||
+ *
|
||||
+ * The 1st column is the HBW node number and the 2nd column is the CPU list
|
||||
+ * which is closet to the HBW node.
|
||||
+ */
|
||||
+ f = fopen("/sys/kernel/hbm_memory/memory_topo/memory_locality", "r");
|
||||
+ if (!f)
|
||||
+ return;
|
||||
+
|
||||
+ while ((n = getline(&line, &len, f)) != -1) {
|
||||
+ long int node, begin_cpu, end_cpu;
|
||||
+ char *begin, *end;
|
||||
+
|
||||
+ /* Get the node number first */
|
||||
+ node = strtol(line, &end, 0);
|
||||
+
|
||||
+ /* Either the node number is invalid or the whole line is invalid */
|
||||
+ if (line == end || node == LONG_MAX || node == LONG_MIN)
|
||||
+ break;
|
||||
+
|
||||
+ if (node >= num_nodes) {
|
||||
+ log_err("Invalid node number provided by memory_locality.");
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ /* Try to find the beginning of the CPU list string */
|
||||
+ while (*end == ' ' && end != line + len)
|
||||
+ end++;
|
||||
+
|
||||
+ if (end == line + len || !isdigit(*end))
|
||||
+ break;
|
||||
+
|
||||
+ begin = end;
|
||||
+ do {
|
||||
+ begin_cpu = strtol(begin, &end, 0);
|
||||
+ if (begin == end || begin_cpu == LONG_MAX || begin_cpu == LONG_MIN)
|
||||
+ break;
|
||||
+
|
||||
+ /* End of the line */
|
||||
+ if (*end == '\0' || *end == '\n') {
|
||||
+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]);
|
||||
+ break;
|
||||
+ } else if (*end == ',') {
|
||||
+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]);
|
||||
+ } else if (*end == '-' && isdigit(*(++end))) {
|
||||
+ begin = end;
|
||||
+ end_cpu = strtol(begin, &end, 0);
|
||||
+ if (begin == end || end_cpu == LONG_MAX || end_cpu == LONG_MIN)
|
||||
+ break;
|
||||
+
|
||||
+ while (begin_cpu <= end_cpu) {
|
||||
+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]);
|
||||
+ ++begin_cpu;
|
||||
+ }
|
||||
+ } else {
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ begin = end + 1;
|
||||
+ } while (begin < line + len);
|
||||
+ }
|
||||
+
|
||||
+ free(line);
|
||||
+ fclose(f);
|
||||
+}
|
||||
+
|
||||
int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm)
|
||||
{
|
||||
*bm = numa_parse_nodestring(nodes_env);
|
||||
@@ -22,6 +107,106 @@ int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm)
|
||||
return MEMKIND_SUCCESS;
|
||||
}
|
||||
|
||||
+int set_numanode_from_memory_locality(void **numanode,
|
||||
+ memkind_node_variant_t node_variant)
|
||||
+{
|
||||
+ int num_cpu = numa_num_configured_cpus();
|
||||
+ int cpuset_size = CPU_ALLOC_SIZE(num_cpu);
|
||||
+ int max_node_id = numa_max_node();
|
||||
+ cpu_set_t **cpunode_mask;
|
||||
+ int init_node, cpu_id;
|
||||
+ int status;
|
||||
+
|
||||
+ cpunode_mask = calloc(max_node_id + 1, sizeof(*cpunode_mask));
|
||||
+ if (!cpunode_mask) {
|
||||
+ status = MEMKIND_ERROR_MALLOC;
|
||||
+ log_err("calloc() failed.");
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ for (init_node = 0; init_node <= max_node_id; init_node++) {
|
||||
+ cpunode_mask[init_node] = CPU_ALLOC(num_cpu);
|
||||
+ if (!cpunode_mask[init_node]) {
|
||||
+ while (init_node >= 0) {
|
||||
+ CPU_FREE(cpunode_mask[init_node]);
|
||||
+ init_node--;
|
||||
+ }
|
||||
+
|
||||
+ status = MEMKIND_ERROR_MALLOC;
|
||||
+ log_err("CPU_ALLOC_SIZE() failed.");
|
||||
+ goto free_cpunode_mask;
|
||||
+ }
|
||||
+
|
||||
+ CPU_ZERO_S(cpuset_size, cpunode_mask[init_node]);
|
||||
+ }
|
||||
+
|
||||
+ init_node_closet_cpu(cpunode_mask, num_cpu, max_node_id + 1);
|
||||
+
|
||||
+ struct vec_cpu_node *node_arr =
|
||||
+ (struct vec_cpu_node *)calloc(num_cpu, sizeof(struct vec_cpu_node));
|
||||
+ if (!node_arr) {
|
||||
+ status = MEMKIND_ERROR_MALLOC;
|
||||
+ log_err("calloc() failed.");
|
||||
+ goto free_cpunode_mask_array;
|
||||
+ }
|
||||
+
|
||||
+ /* Scan CPUs once. Assuming the CPU number are much more bigger than NUMA Nodes */
|
||||
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
|
||||
+ for (init_node = 0; init_node <= max_node_id; init_node++) {
|
||||
+ if (CPU_ISSET_S(cpu_id, cpuset_size, cpunode_mask[init_node])) {
|
||||
+ VEC_PUSH_BACK(&node_arr[cpu_id], init_node);
|
||||
+
|
||||
+ /*
|
||||
+ * A cpu should always have one closet node, log error if
|
||||
+ * violate this.
|
||||
+ */
|
||||
+ if (node_variant == NODE_VARIANT_SINGLE &&
|
||||
+ VEC_SIZE(&node_arr[cpu_id]) > 1) {
|
||||
+ log_err("CPU%d has more than one closet node.", cpu_id);
|
||||
+ status = MEMKIND_ERROR_RUNTIME;
|
||||
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
|
||||
+ if (VEC_CAPACITY(&node_arr[cpu_id]))
|
||||
+ VEC_DELETE(&node_arr[cpu_id]);
|
||||
+ }
|
||||
+
|
||||
+ goto free_node_arr;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Sanity Check each node_arr */
|
||||
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
|
||||
+ if (VEC_SIZE(&node_arr[cpu_id]) == 0) {
|
||||
+ log_err("CPU%d's nodemask is not initialized.", cpu_id);
|
||||
+ status = MEMKIND_ERROR_RUNTIME;
|
||||
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
|
||||
+ if (VEC_CAPACITY(&node_arr[cpu_id]))
|
||||
+ VEC_DELETE(&node_arr[cpu_id]);
|
||||
+ }
|
||||
+
|
||||
+ goto free_node_arr;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ *numanode = node_arr;
|
||||
+ status = MEMKIND_SUCCESS;
|
||||
+ goto free_cpunode_mask_array;
|
||||
+
|
||||
+free_node_arr:
|
||||
+ free(node_arr);
|
||||
+
|
||||
+free_cpunode_mask_array:
|
||||
+ for (init_node = 0; init_node <= max_node_id; init_node++)
|
||||
+ CPU_FREE(cpunode_mask[init_node]);
|
||||
+
|
||||
+free_cpunode_mask:
|
||||
+ free(cpunode_mask);
|
||||
+
|
||||
+out:
|
||||
+ return status;
|
||||
+}
|
||||
+
|
||||
int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode,
|
||||
memkind_node_variant_t node_variant)
|
||||
{
|
||||
diff --git a/src/memkind_hbw.c b/src/memkind_hbw.c
|
||||
index 077660ab..e9948593 100644
|
||||
--- a/src/memkind_hbw.c
|
||||
+++ b/src/memkind_hbw.c
|
||||
@@ -363,10 +363,36 @@ static bool is_hmat_supported(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * OS may provide further information of HBW topology in
|
||||
+ * /sys/kernel/hbm_memory/memory_topo/memory_locality. Use it unless user
|
||||
+ * specified HBW nodes or disabled using of memory_locality.
|
||||
+ */
|
||||
+static bool use_memory_locality(void)
|
||||
+{
|
||||
+ char *memory_locality_disable = memkind_get_env("MEMKIND_DISABLE_MEMORY_LOCALITY");
|
||||
+
|
||||
+ if (memory_locality_disable && !strncmp(memory_locality_disable, "1", 1))
|
||||
+ return false;
|
||||
+
|
||||
+ if (memkind_get_env("MEMKIND_HBW_NODES"))
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static void memkind_hbw_closest_numanode_init(void)
|
||||
{
|
||||
struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_MULTIPLE];
|
||||
g->numanode = NULL;
|
||||
+
|
||||
+ if (use_memory_locality()) {
|
||||
+ g->init_err = set_numanode_from_memory_locality(&g->numanode,
|
||||
+ NODE_VARIANT_MULTIPLE);
|
||||
+ if (!g->init_err)
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (!is_hmat_supported()) {
|
||||
g->init_err = set_closest_numanode(memkind_hbw_get_nodemask,
|
||||
&g->numanode, NODE_VARIANT_MULTIPLE);
|
||||
@@ -380,6 +406,14 @@ static void memkind_hbw_closest_preferred_numanode_init(void)
|
||||
{
|
||||
struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_SINGLE];
|
||||
g->numanode = NULL;
|
||||
+
|
||||
+ if (use_memory_locality()) {
|
||||
+ g->init_err = set_numanode_from_memory_locality(&g->numanode,
|
||||
+ NODE_VARIANT_SINGLE);
|
||||
+ if (!g->init_err)
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (!is_hmat_supported()) {
|
||||
g->init_err = set_closest_numanode(memkind_hbw_get_nodemask,
|
||||
&g->numanode, NODE_VARIANT_SINGLE);
|
||||
@@ -393,6 +427,14 @@ static void memkind_hbw_all_numanode_init(void)
|
||||
{
|
||||
struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_ALL];
|
||||
g->numanode = NULL;
|
||||
+
|
||||
+ if (use_memory_locality()) {
|
||||
+ g->init_err = set_numanode_from_memory_locality(&g->numanode,
|
||||
+ NODE_VARIANT_ALL);
|
||||
+ if (!g->init_err)
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (!is_hmat_supported()) {
|
||||
g->init_err = set_closest_numanode(memkind_hbw_get_nodemask,
|
||||
&g->numanode, NODE_VARIANT_ALL);
|
||||
--
|
||||
2.24.0
|
||||
|
||||
@ -1,12 +1,11 @@
|
||||
Name: memkind
|
||||
Summary: Extensible Heap Manager for User
|
||||
Version: 1.14.0
|
||||
Release: 6
|
||||
Release: 7
|
||||
License: BSD
|
||||
URL: http://memkind.github.io/memkind
|
||||
Source0: https://github.com/memkind/memkind/archive/v1.14.0/%{name}-%{version}.tar.gz
|
||||
Patch0001: 0001-support-multi-threading-build.patch
|
||||
Patch0002: 0002-Support-initializing-HBW-nodes-from-memory_locality.patch
|
||||
Patch0003: 0003-memkind-add-sw64-support.patch
|
||||
|
||||
BuildRequires: automake libtool numactl-devel systemd gcc gcc-c++ hwloc-devel
|
||||
@ -84,6 +83,9 @@ popd
|
||||
%{_mandir}/man7/*
|
||||
|
||||
%changelog
|
||||
* Thu Feb 20 2025 JiangShui Yang <yangjiangshui@h-partners.com> - 1.14.0-7
|
||||
- Revert "Support initializing HBW nodes from memory_locality" patch
|
||||
|
||||
* Mon Feb 17 2025 maqi <maqi@uniontech.com> - 1.14.0-6
|
||||
- add support sw_64
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user