irqbalance/feature-introduce-verifyhint-to-detect-hint-variatio.patch
2021-04-09 14:58:29 +08:00

469 lines
14 KiB
Diff

From e5b83ac140634830b8f8d9ca8d40a1d9d16d2d5b Mon Sep 17 00:00:00 2001
From: hejingxian <hejingxian@huawei.com>
Date: Tue, 12 Nov 2019 15:29:16 +0800
Subject: [PATCH] feature: introduce affinity hint verify to detect user hint variation
In order to make the user affinity hint becomes effective quickly,
introduce the periodically affinity hint verify.
---
Makefile.am | 2 +-
activate.c | 14 ++++--
classify.c | 5 ++-
cpumask.h | 7 +++
hint_verify.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++
hint_verify.h | 21 ++++++++
irqbalance.c | 43 ++++++++++-----
irqbalance.h | 5 ++
placement.c | 14 ++++++
types.h | 1 +
10 files changed, 228 insertions(+), 20 deletions(-)
create mode 100644 hint_verify.c
create mode 100644 hint_verify.h
diff --git a/Makefile.am b/Makefile.am
index 3086d67..aacb399 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -38,7 +38,7 @@ sbin_PROGRAMS += irqbalance-ui
endif
irqbalance_SOURCES = activate.c bitmap.c classify.c cputree.c irqbalance.c \
- irqlist.c numa.c placement.c procinterrupts.c rules_config.c
+ irqlist.c numa.c placement.c procinterrupts.c rules_config.c hint_verify.c
irqbalance_LDADD = $(LIBCAP_NG_LIBS) $(GLIB2_LIBS)
if IRQBALANCEUI
irqbalance_ui_SOURCES = $(UI_DIR)/helpers.c $(UI_DIR)/irqbalance-ui.c \
diff --git a/activate.c b/activate.c
index 93fde58..c5859bf 100644
--- a/activate.c
+++ b/activate.c
@@ -78,11 +78,6 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un
int ret = 0;
cpumask_t applied_mask;
- /*
- * only activate mappings for irqs that have moved
- */
- if (!info->moved)
- return;
if (!info->assigned_obj)
return;
@@ -90,6 +85,15 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un
/* activate only online cpus, otherwise writing to procfs returns EOVERFLOW */
cpus_and(applied_mask, cpu_online_map, info->assigned_obj->mask);
+ if (hint_enabled) {
+ if (!cpus_empty(info->affinity_hint)) {
+ cpus_and(applied_mask, applied_mask, info->affinity_hint);
+ if (!cpus_intersects(applied_mask, unbanned_cpus)) {
+ return;
+ }
+ }
+ }
+
/*
* Don't activate anything for which we have an invalid mask
*/
diff --git a/classify.c b/classify.c
index 254197e..e9987ee 100644
--- a/classify.c
+++ b/classify.c
@@ -263,7 +263,7 @@ static int get_irq_class(const char *devpath)
return irq_class;
}
-static gint compare_ints(gconstpointer a, gconstpointer b)
+gint compare_ints(gconstpointer a, gconstpointer b)
{
const struct irq_info *ai = a;
const struct irq_info *bi = b;
@@ -397,6 +397,9 @@ get_numa_node:
process_one_line(path, get_mask_from_bitmap, &new->cpumask);
}
+ sprintf(path, "/proc/irq/%d/affinity_hint", irq);
+ process_one_line(path, get_mask_from_bitmap, &new->affinity_hint);
+
log(TO_CONSOLE, LOG_INFO, "Adding IRQ %d to database\n", irq);
return new;
}
diff --git a/cpumask.h b/cpumask.h
index 5bebbeb..bc5f006 100644
--- a/cpumask.h
+++ b/cpumask.h
@@ -30,6 +30,7 @@
* void cpus_xor(dst, src1, src2) dst = src1 ^ src2
* void cpus_andnot(dst, src1, src2) dst = src1 & ~src2
* void cpus_complement(dst, src) dst = ~src
+ * void cpumask_copy(dst, src) dst = src
*
* int cpus_equal(mask1, mask2) Does mask1 == mask2?
* int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect?
@@ -142,6 +143,12 @@ static inline void __cpus_complement(cpumask_t *dstp,
bitmap_complement(dstp->bits, srcp->bits, nbits);
}
+#define cpumask_copy(dst, src) __cpumask_copy(&(dst), &(src), NR_CPUS)
+static inline void __cpumask_copy(cpumask_t *dstp, const cpumask_t *srcp, int nbits)
+{
+ bitmap_copy(dstp->bits, srcp->bits, nbits);
+}
+
#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
static inline int __cpus_equal(const cpumask_t *src1p,
const cpumask_t *src2p, int nbits)
diff --git a/hint_verify.c b/hint_verify.c
new file mode 100644
index 0000000..3492902
--- /dev/null
+++ b/hint_verify.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2019. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include "irqbalance.h"
+
+extern int keep_going;
+extern GMainLoop *main_loop;
+extern gboolean scan();
+extern int last_interval;
+
+int real_sleep_interval;
+int sleep_interval_count;
+int poll_hint_interval_count;
+int sleep_count = 0;
+gboolean hint_has_changed = FALSE;
+
+int hint_changed(void)
+{
+ FILE *file;
+ char *line = NULL;
+ size_t size = 0;
+ gboolean changed = FALSE;
+
+ file = fopen("/proc/irq/affinity_hint_notify", "r+");
+ if (!file)
+ return changed;
+
+ if (getline(&line, &size, file) > 0 && *line != '0') {
+ fprintf(file, "Done");
+ changed = TRUE;
+ }
+
+ fclose(file);
+ if (line)
+ free(line);
+ return changed;
+}
+
+void update_affinity_hint(struct irq_info *info, void *data __attribute__((unused)))
+{
+ cpumask_t current_affinity_hint;
+ char path[PATH_MAX];
+
+ if (!hint_enabled)
+ return;
+
+ cpus_clear(info->affinity_hint);
+ sprintf(path, "/proc/irq/%d/affinity_hint", info->irq);
+ process_one_line(path, get_mask_from_bitmap, &current_affinity_hint);
+
+ if (!cpus_equal(current_affinity_hint, info->affinity_hint)) {
+ cpumask_copy(info->affinity_hint, current_affinity_hint);
+ force_rebalance_irq(info, data);
+ hint_has_changed = TRUE;
+ cpumask_scnprintf(path, PATH_MAX, current_affinity_hint);
+ log(TO_ALL, LOG_INFO, "IRQ(%d): affinity hint modified %s\n", info->irq, path);
+ }
+}
+
+/*
+ * This function is the main loop of irqbalance, which include:
+ * 1. scan opration for irq balancing;
+ * 2. poll irq affinity hint changes for quickly applying them.
+ */
+gboolean poll_hint_affinity_and_scan(gpointer data __attribute__((unused)))
+{
+ gboolean need_verify_flag = FALSE;
+ gboolean need_scan_flag = FALSE;
+
+ if (!sleep_interval_count)
+ sleep_interval_count = 1;
+ if (!poll_hint_interval_count)
+ poll_hint_interval_count = 1;
+
+ if (sleep_count % sleep_interval_count == 0) {
+ need_scan_flag = TRUE;
+ }
+ if (sleep_count % poll_hint_interval_count == 0) {
+ need_verify_flag = TRUE;
+ }
+ sleep_count++;
+
+ if (need_verify_flag && hint_changed()) {
+ for_each_irq(NULL, update_affinity_hint, NULL);
+ if (hint_has_changed) {
+ hint_has_changed = FALSE;
+ sleep_count = 1;
+ need_scan_flag = TRUE;
+ }
+ }
+
+ if (need_scan_flag) {
+ if (!scan()) {
+ g_main_loop_quit(main_loop);
+ return FALSE;
+ }
+ }
+
+ update_interval_and_count();
+ if (last_interval != real_sleep_interval) {
+ last_interval = real_sleep_interval;
+ g_timeout_add_seconds(real_sleep_interval, poll_hint_affinity_and_scan, NULL);
+ return FALSE;
+ }
+
+ if (keep_going) {
+ return TRUE;
+ } else {
+ g_main_loop_quit(main_loop);
+ return FALSE;
+ }
+}
+
+void update_interval_and_count()
+{
+ real_sleep_interval =
+ sleep_interval > poll_hint_interval ? poll_hint_interval : sleep_interval;
+ if (!real_sleep_interval) {
+ sleep_interval_count = 1;
+ poll_hint_interval_count = 1;
+ return;
+ }
+ sleep_interval_count = sleep_interval / real_sleep_interval;
+ poll_hint_interval_count = poll_hint_interval / real_sleep_interval;
+}
diff --git a/hint_verify.h b/hint_verify.h
new file mode 100644
index 0000000..7391b32
--- /dev/null
+++ b/hint_verify.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2019. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _INCLUDE_HINT_VERIFY_H
+#define _INCLUDE_HINT_VERIFY_H
+
+extern int real_sleep_interval;
+extern gboolean poll_hint_affinity_and_scan();
+extern void update_interval_and_count();
+
+#endif
diff --git a/irqbalance.c b/irqbalance.c
index 450a1ff..5985d8d 100644
--- a/irqbalance.c
+++ b/irqbalance.c
@@ -64,6 +64,8 @@ char *polscript = NULL;
long HZ;
int sleep_interval = SLEEP_INTERVAL;
int last_interval;
+int hint_enabled = 0;
+int poll_hint_interval = SLEEP_INTERVAL / 5;
GMainLoop *main_loop;
char *cpu_ban_string = NULL;
@@ -100,6 +102,8 @@ struct option lopts[] = {
{"version", 0, NULL, 'V'},
{"migrateval", 1, NULL, 'e'},
{"rulesconfig", 1, NULL, 'r'},
+ {"hintpolicy", 1, NULL, 'h'},
+ {"verifyhint", 1, NULL, 'v'},
{0, 0, 0, 0}
};
@@ -108,7 +112,7 @@ static void usage(void)
log(TO_CONSOLE, LOG_INFO, "irqbalance [--oneshot | -o] [--debug | -d] [--foreground | -f] [--journal | -j]\n");
log(TO_CONSOLE, LOG_INFO, " [--powerthresh= | -p <off> | <n>] [--banirq= | -i <n>] [--banmod= | -m <module>] [--policyscript= | -l <script>]\n");
log(TO_CONSOLE, LOG_INFO, " [--pid= | -s <file>] [--deepestcache= | -c <n>] [--interval= | -t <n>] [--migrateval= | -e <n>]\n");
- log(TO_CONSOLE, LOG_INFO, " [--rulesconfig= | -r <config>]\n");
+ log(TO_CONSOLE, LOG_INFO, " [--rulesconfig= | -r <config>] [--hintpolicy | -h <subset>] [--verifyhint= | -v n]\n");
}
static void version(void)
@@ -123,7 +127,7 @@ static void parse_command_line(int argc, char **argv)
unsigned long val;
while ((opt = getopt_long(argc, argv,
- "odfjVi:p:s:c:l:m:t:e:r:",
+ "odfjVi:p:s:c:l:m:t:e:r:h:v:",
lopts, &longind)) != -1) {
switch(opt) {
@@ -198,6 +202,22 @@ static void parse_command_line(int argc, char **argv)
case 'r':
rules_config_file = strdup(optarg);
break;
+ case 'h':
+ if (!strncmp(optarg, "subset", strlen(optarg)))
+ hint_enabled = 1;
+ else {
+ usage();
+ exit(1);
+ }
+
+ break;
+ case 'v':
+ poll_hint_interval = strtol(optarg, NULL, 10);
+ if (poll_hint_interval < 1) {
+ usage();
+ exit(1);
+ }
+ break;
}
}
}
@@ -261,6 +283,10 @@ void force_rebalance_irq(struct irq_info *info, void *data __attribute__((unused)))
if (info->level == BALANCE_NONE)
return;
+ /* Prevent inserting a duplicate entry to avoid list chaos */
+ if (g_list_find_custom(rebalance_irq_list, info, compare_ints))
+ return;
+
if (info->assigned_obj == NULL)
rebalance_irq_list = g_list_append(rebalance_irq_list, info);
else
@@ -275,7 +299,7 @@ static int check_debug()
return ret;
}
-gboolean scan(gpointer data __attribute__((unused)))
+gboolean scan()
{
log(TO_CONSOLE, LOG_INFO, "\n\n\n-----------------------------------------------------------------------------\n");
clear_work_stats();
@@ -325,17 +349,9 @@ out:
keep_going = 0;
cycle_count++;
- /* sleep_interval may be changed by socket */
- if (last_interval != sleep_interval) {
- last_interval = sleep_interval;
- g_timeout_add_seconds(sleep_interval, scan, NULL);
- return FALSE;
- }
-
if (keep_going) {
return TRUE;
} else {
- g_main_loop_quit(main_loop);
return FALSE;
}
}
@@ -720,9 +736,10 @@ int main(int argc, char** argv)
ret = EXIT_FAILURE;
goto out;
}
+ update_interval_and_count();
main_loop = g_main_loop_new(NULL, FALSE);
- last_interval = sleep_interval;
- g_timeout_add_seconds(sleep_interval, scan, NULL);
+ last_interval = real_sleep_interval;
+ g_timeout_add_seconds(real_sleep_interval, poll_hint_affinity_and_scan, NULL);
g_main_loop_run(main_loop);
g_main_loop_quit(main_loop);
diff --git a/irqbalance.h b/irqbalance.h
index 78d0adc..4a73b83 100644
--- a/irqbalance.h
+++ b/irqbalance.h
@@ -15,6 +15,7 @@
#include "types.h"
#include "config.h"
#include "rules_config.h"
+#include "hint_verify.h"
#ifdef __aarch64__
#define AARCH64
@@ -114,6 +115,10 @@ extern void add_banned_irq(int irq);
extern void add_cl_banned_module(char *modname);
extern void add_banned_irq(int irq);
extern void remove_one_irq_from_db(int irq);
+extern void force_rebalance_irq(struct irq_info *info, void *data __attribute__((unused)));
+extern gint compare_ints(gconstpointer a, gconstpointer b);
+extern int hint_enabled, poll_hint_interval;
+extern int sleep_interval;
#define irq_numa_node(irq) ((irq)->numa_node)
diff --git a/placement.c b/placement.c
index bf27297..db79b8b 100644
--- a/placement.c
+++ b/placement.c
@@ -41,6 +41,7 @@ static void find_best_object(struct topo_obj *d, void *data)
{
struct obj_placement *best = (struct obj_placement *)data;
uint64_t newload;
+ cpumask_t subset;
/*
* Don't consider the unspecified numa node here
@@ -58,6 +59,19 @@ static void find_best_object(struct topo_obj *d, void *data)
if (d->powersave_mode)
return;
+ /*
+ * If the hint feature is enabled, then we only want
+ * to consider objects that are within the irqs hint, but
+ * only if that irq in fact has published a hint
+ */
+ if (hint_enabled) {
+ if (!cpus_empty(best->info->affinity_hint)) {
+ cpus_and(subset, best->info->affinity_hint, d->mask);
+ if (cpus_empty(subset))
+ return;
+ }
+ }
+
newload = d->load;
if (newload < best->best_cost) {
best->best = d;
diff --git a/types.h b/types.h
index fa91561..3a04318 100644
--- a/types.h
+++ b/types.h
@@ -67,6 +67,7 @@ struct irq_info {
int flags;
struct topo_obj *numa_node;
cpumask_t cpumask;
+ cpumask_t affinity_hint;
uint64_t irq_count;
uint64_t last_irq_count;
uint64_t load;
--
2.23.0