500 lines
14 KiB
Diff
500 lines
14 KiB
Diff
From e5b83ac140634830b8f8d9ca8d40a1d9d16d2d5b Mon Sep 17 00:00:00 2001
|
|
From: hejingxian <hejingxian@huawei.com>
|
|
Date: Tue, 12 Nov 2019 15:29:16 +0800
|
|
Subject: [PATCH] feature: introduce affinity hint verify to detect user hint variation
|
|
|
|
In order to make the user affinity hint becomes effective quickly,
|
|
introduce the periodically affinity hint verify.
|
|
---
|
|
Makefile.am | 2 +-
|
|
activate.c | 24 +++++------
|
|
classify.c | 18 ++++++--
|
|
cpumask.h | 7 +++
|
|
hint_verify.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
hint_verify.h | 21 +++++++++
|
|
irqbalance.c | 40 +++++++++++------
|
|
irqbalance.h | 4 ++
|
|
placement.c | 14 ++++++
|
|
types.h | 1 +
|
|
10 files changed, 252 insertions(+), 30 deletions(-)
|
|
create mode 100644 hint_verify.c
|
|
create mode 100644 hint_verify.h
|
|
|
|
diff --git a/Makefile.am b/Makefile.am
|
|
index 9276bfb..5fac265 100644
|
|
--- a/Makefile.am
|
|
+++ b/Makefile.am
|
|
@@ -38,7 +38,7 @@ sbin_PROGRAMS += irqbalance-ui
|
|
endif
|
|
|
|
irqbalance_SOURCES = activate.c bitmap.c classify.c cputree.c irqbalance.c \
|
|
- irqlist.c numa.c placement.c procinterrupts.c rules_config.c
|
|
+ irqlist.c numa.c placement.c procinterrupts.c rules_config.c hint_verify.c
|
|
irqbalance_LDADD = $(LIBCAP_NG_LIBS) $(GLIB2_LIBS)
|
|
if IRQBALANCEUI
|
|
irqbalance_ui_SOURCES = $(UI_DIR)/helpers.c $(UI_DIR)/irqbalance-ui.c \
|
|
diff --git a/activate.c b/activate.c
|
|
index d9e1fc3..87336f4 100644
|
|
--- a/activate.c
|
|
+++ b/activate.c
|
|
@@ -88,20 +88,27 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un
|
|
char buf[PATH_MAX];
|
|
FILE *file;
|
|
int ret = 0;
|
|
+ cpumask_t applied_mask;
|
|
|
|
- /*
|
|
- * only activate mappings for irqs that have moved
|
|
- */
|
|
- if (!info->moved)
|
|
- return;
|
|
-
|
|
if (!info->assigned_obj)
|
|
return;
|
|
|
|
+ applied_mask = info->assigned_obj->mask;
|
|
+
|
|
+ if (hint_enabled) {
|
|
+ if (!cpus_empty(info->affinity_hint)) {
|
|
+ cpus_and(applied_mask, applied_mask, info->affinity_hint);
|
|
+ if (!cpus_intersects(applied_mask, unbanned_cpus)) {
|
|
+ log(TO_ALL, LOG_WARNING, "irq %d affinity_hint subset empty\n", info->irq);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
/*
|
|
* Don't activate anything for which we have an invalid mask
|
|
*/
|
|
- if (check_affinity(info, info->assigned_obj->mask))
|
|
+ if (check_affinity(info, applied_mask))
|
|
return;
|
|
|
|
sprintf(buf, "/proc/irq/%i/smp_affinity", info->irq);
|
|
@@ -120,7 +120,7 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un
|
|
if (!file)
|
|
return;
|
|
|
|
- cpumask_scnprintf(buf, PATH_MAX, info->assigned_obj->mask);
|
|
+ cpumask_scnprintf(buf, PATH_MAX, applied_mask);
|
|
if (ban_pci_assigned_irq) {
|
|
if (!is_still_pci_assigned_irq(info->irq)) {
|
|
ret = fprintf(file, "%s", buf);
|
|
diff --git a/classify.c b/classify.c
|
|
index 5aed9e5..75677f4 100644
|
|
--- a/classify.c
|
|
+++ b/classify.c
|
|
@@ -448,7 +446,7 @@ get_numa_node:
|
|
fd = fopen(path, "r");
|
|
if (!fd) {
|
|
cpus_setall(new->cpumask);
|
|
- goto out;
|
|
+ goto assign_affinity_hint;
|
|
}
|
|
lcpu_mask = NULL;
|
|
ret = getline(&lcpu_mask, &blen, fd);
|
|
@@ -460,6 +458,20 @@ get_numa_node:
|
|
}
|
|
free(lcpu_mask);
|
|
|
|
+assign_affinity_hint:
|
|
+ cpus_clear(new->affinity_hint);
|
|
+ sprintf(path, "/proc/irq/%d/affinity_hint", irq);
|
|
+ fd = fopen(path, "r");
|
|
+ if (!fd)
|
|
+ goto out;
|
|
+ lcpu_mask = NULL;
|
|
+ ret = getline(&lcpu_mask, &blen, fd);
|
|
+ fclose(fd);
|
|
+ if (ret <= 0)
|
|
+ goto out;
|
|
+ cpumask_parse_user(lcpu_mask, ret, new->affinity_hint);
|
|
+ free(lcpu_mask);
|
|
+
|
|
out:
|
|
log(TO_CONSOLE, LOG_INFO, "Adding IRQ %d to database\n", irq);
|
|
return new;
|
|
diff --git a/cpumask.h b/cpumask.h
|
|
index 0774a88..8dd3703 100644
|
|
--- a/cpumask.h
|
|
+++ b/cpumask.h
|
|
@@ -30,6 +30,7 @@
|
|
* void cpus_xor(dst, src1, src2) dst = src1 ^ src2
|
|
* void cpus_andnot(dst, src1, src2) dst = src1 & ~src2
|
|
* void cpus_complement(dst, src) dst = ~src
|
|
+ * void cpumask_copy(dst, src)dst = src
|
|
*
|
|
* int cpus_equal(mask1, mask2) Does mask1 == mask2?
|
|
* int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect?
|
|
@@ -150,6 +151,12 @@ static inline void __cpus_complement(cpumask_t *dstp,
|
|
bitmap_complement(dstp->bits, srcp->bits, nbits);
|
|
}
|
|
|
|
+#define cpumask_copy(dst, src) __cpumask_copy(&(dst), &(src), NR_CPUS)
|
|
+static inline void __cpumask_copy(cpumask_t *dstp, const cpumask_t *srcp, int nbits)
|
|
+{
|
|
+ bitmap_copy(dstp->bits, srcp->bits, nbits);
|
|
+}
|
|
+
|
|
#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
|
|
static inline int __cpus_equal(const cpumask_t *src1p,
|
|
const cpumask_t *src2p, int nbits)
|
|
diff --git a/hint_verify.c b/hint_verify.c
|
|
new file mode 100644
|
|
index 0000000..7a904b0
|
|
--- /dev/null
|
|
+++ b/hint_verify.c
|
|
@@ -0,0 +1,151 @@
|
|
+/*
|
|
+ * Copyright (C) 2019. Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ *
|
|
+ * This program file is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License as published by the
|
|
+ * Free Software Foundation; version 2 of the License.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * for more details.
|
|
+ */
|
|
+#include <stdio.h>
|
|
+#include <stdlib.h>
|
|
+#include "irqbalance.h"
|
|
+
|
|
+extern int keep_going;
|
|
+extern GMainLoop *main_loop;
|
|
+extern gboolean scan();
|
|
+extern int last_interval;
|
|
+
|
|
+int real_sleep_interval;
|
|
+int sleep_interval_count;
|
|
+int poll_hint_interval_count;
|
|
+int sleep_count = 0;
|
|
+gboolean hint_has_changed = FALSE;
|
|
+
|
|
+int hint_changed(void)
|
|
+{
|
|
+ FILE *file;
|
|
+ char *line = NULL;
|
|
+ size_t size = 0;
|
|
+ gboolean changed = FALSE;
|
|
+
|
|
+ file = fopen("/proc/irq/affinity_hint_notify", "r+");
|
|
+ if (!file)
|
|
+ return changed;
|
|
+
|
|
+ if (getline(&line, &size, file) > 0 && *line != '0') {
|
|
+ fprintf(file, "Done");
|
|
+ changed = TRUE;
|
|
+ }
|
|
+
|
|
+ fclose(file);
|
|
+ if (line)
|
|
+ free(line);
|
|
+ return changed;
|
|
+}
|
|
+
|
|
+void update_affinity_hint(struct irq_info *info, void *data __attribute__((unused)))
|
|
+{
|
|
+ FILE *file = NULL;
|
|
+ cpumask_t current_affinity_hint;
|
|
+ char path[PATH_MAX];
|
|
+ char *line = NULL;
|
|
+ size_t size = 0;
|
|
+ ssize_t len;
|
|
+
|
|
+ if (!hint_enabled)
|
|
+ return;
|
|
+
|
|
+ cpus_clear(info->affinity_hint);
|
|
+ sprintf(path, "/proc/irq/%d/affinity_hint", info->irq);
|
|
+ file = fopen(path, "r");
|
|
+ if (!file)
|
|
+ return;
|
|
+
|
|
+ len = getline(&line, &size, file);
|
|
+ fclose(file);
|
|
+
|
|
+ if (len > 0) {
|
|
+ cpumask_parse_user(line, len, current_affinity_hint);
|
|
+ if (!cpus_equal(current_affinity_hint, info->affinity_hint)) {
|
|
+ cpumask_copy(info->affinity_hint, current_affinity_hint);
|
|
+ force_rebalance_irq(info, data);
|
|
+ hint_has_changed = TRUE;
|
|
+ log(TO_ALL, LOG_INFO, "IRQ(%d): affinity hint modified %s\n", info->irq, line);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (line)
|
|
+ free(line);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This function is the main loop of irqbalance, which include:
|
|
+ * 1. scan opration for irq balancing;
|
|
+ * 2. poll irq affinity hint changes for quickly applying them.
|
|
+ */
|
|
+gboolean poll_hint_affinity_and_scan(gpointer data __attribute__((unused)))
|
|
+{
|
|
+ gboolean need_verify_flag = FALSE;
|
|
+ gboolean need_scan_flag = FALSE;
|
|
+
|
|
+ if (!sleep_interval_count)
|
|
+ sleep_interval_count = 1;
|
|
+ if (!poll_hint_interval_count)
|
|
+ poll_hint_interval_count = 1;
|
|
+
|
|
+ if (sleep_count % sleep_interval_count == 0) {
|
|
+ need_scan_flag = TRUE;
|
|
+ }
|
|
+ if (sleep_count % poll_hint_interval_count == 0) {
|
|
+ need_verify_flag = TRUE;
|
|
+ }
|
|
+ sleep_count++;
|
|
+
|
|
+ if (need_verify_flag && hint_changed()) {
|
|
+ for_each_irq(NULL, update_affinity_hint, NULL);
|
|
+ if (hint_has_changed) {
|
|
+ hint_has_changed = FALSE;
|
|
+ sleep_count = 1;
|
|
+ need_scan_flag = TRUE;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (need_scan_flag) {
|
|
+ if (!scan()) {
|
|
+ g_main_loop_quit(main_loop);
|
|
+ return FALSE;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ update_interval_and_count();
|
|
+ if (last_interval != real_sleep_interval) {
|
|
+ last_interval = real_sleep_interval;
|
|
+ g_timeout_add_seconds(real_sleep_interval, poll_hint_affinity_and_scan, NULL);
|
|
+ return FALSE;
|
|
+ }
|
|
+
|
|
+ if (keep_going) {
|
|
+ return TRUE;
|
|
+ } else {
|
|
+ g_main_loop_quit(main_loop);
|
|
+ return FALSE;
|
|
+ }
|
|
+}
|
|
+
|
|
+void update_interval_and_count()
|
|
+{
|
|
+ real_sleep_interval =
|
|
+ sleep_interval > poll_hint_interval ? poll_hint_interval : sleep_interval;
|
|
+ if (!real_sleep_interval) {
|
|
+ sleep_interval_count = 1;
|
|
+ poll_hint_interval_count = 1;
|
|
+ return;
|
|
+ }
|
|
+ sleep_interval_count = sleep_interval / real_sleep_interval;
|
|
+ poll_hint_interval_count = poll_hint_interval / real_sleep_interval;
|
|
+}
|
|
+
|
|
diff --git a/hint_verify.h b/hint_verify.h
|
|
new file mode 100644
|
|
index 0000000..a309461
|
|
--- /dev/null
|
|
+++ b/hint_verify.h
|
|
@@ -0,0 +1,21 @@
|
|
+/*
|
|
+ * Copyright (C) 2019. Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ *
|
|
+ * This program file is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License as published by the
|
|
+ * Free Software Foundation; version 2 of the License.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * for more details.
|
|
+ */
|
|
+
|
|
+#ifndef _INCLUDE_HINT_VERIFY_H
|
|
+#define _INCLUDE_HINT_VERIFY_H
|
|
+
|
|
+extern int real_sleep_interval;
|
|
+extern gboolean poll_hint_affinity_and_scan();
|
|
+extern void update_interval_and_count();
|
|
+
|
|
+#endif
|
|
diff --git a/irqbalance.c b/irqbalance.c
|
|
index faa8e6a..4a7eb39 100644
|
|
--- a/irqbalance.c
|
|
+++ b/irqbalance.c
|
|
@@ -65,6 +65,8 @@ char *banscript = NULL;
|
|
long HZ;
|
|
int sleep_interval = SLEEP_INTERVAL;
|
|
int last_interval;
|
|
+int hint_enabled = 0;
|
|
+int poll_hint_interval = SLEEP_INTERVAL / 5;
|
|
unsigned long migrate_val = 0;
|
|
unsigned long load_limit = 0;
|
|
GMainLoop *main_loop;
|
|
@@ -99,15 +101,16 @@ struct option lopts[] = {
|
|
{"migrateval", 1, NULL, 'e'},
|
|
{"loadlimit", 1, NULL, 'g'},
|
|
{"rulesconfig", 1, NULL, 'r'},
|
|
+ {"verifyhint", 1, NULL, 'v'},
|
|
{0, 0, 0, 0}
|
|
};
|
|
|
|
static void usage(void)
|
|
{
|
|
- log(TO_CONSOLE, LOG_INFO, "irqbalance [--oneshot | -o] [--debug | -d] [--foreground | -f] [--journal | -j] [--hintpolicy= | -h [exact|subset|ignore]]\n");
|
|
+ log(TO_CONSOLE, LOG_INFO, "irqbalance [--oneshot | -o] [--debug | -d] [--foreground | -f] [--journal | -j] [--hintpolicy | -h <subset>]\n");
|
|
log(TO_CONSOLE, LOG_INFO, " [--powerthresh= | -p <off> | <n>] [--banirq= | -i <n>] [--banmod= | -m <module>] [--policyscript= | -l <script>]\n");
|
|
log(TO_CONSOLE, LOG_INFO, " [--pid= | -s <file>] [--deepestcache= | -c <n>] [--interval= | -t <n>] [--migrateval= | -e <n>] [--loadlimit= | -g <n>]\n");
|
|
- log(TO_CONSOLE, LOG_INFO, " [--rulesconfig= | -r <config>]\n");
|
|
+ log(TO_CONSOLE, LOG_INFO, " [--rulesconfig= | -r <config>] [--verifyhint= | -v n]\n");
|
|
}
|
|
|
|
static void version(void)
|
|
@@ -122,7 +125,7 @@ static void parse_command_line(int argc, char **argv)
|
|
unsigned long val;
|
|
|
|
while ((opt = getopt_long(argc, argv,
|
|
- "odfjVi:p:s:c:b:l:m:t:e:g:r:",
|
|
+ "odfjVi:p:s:c:b:l:m:t:e:g:r:h:v:",
|
|
lopts, &longind)) != -1) {
|
|
|
|
switch(opt) {
|
|
@@ -202,6 +205,22 @@ static void parse_command_line(int argc, char **argv)
|
|
case 'g':
|
|
load_limit = strtoul(optarg, NULL, 10);
|
|
break;
|
|
+ case 'h':
|
|
+ if (!strncmp(optarg, "subset", strlen(optarg)))
|
|
+ hint_enabled = 1;
|
|
+ else {
|
|
+ usage();
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ break;
|
|
+ case 'v':
|
|
+ poll_hint_interval = strtol(optarg, NULL, 10);
|
|
+ if (poll_hint_interval < 1) {
|
|
+ usage();
|
|
+ exit(1);
|
|
+ }
|
|
+ break;
|
|
case 'r':
|
|
rules_config_file = strdup(optarg);
|
|
break;
|
|
@@ -300,7 +319,7 @@ out:
|
|
return 0;
|
|
}
|
|
|
|
-gboolean scan(gpointer data __attribute__((unused)))
|
|
+gboolean scan()
|
|
{
|
|
log(TO_CONSOLE, LOG_INFO, "\n\n\n-----------------------------------------------------------------------------\n");
|
|
clear_work_stats();
|
|
@@ -338,17 +357,9 @@ gboolean scan(gpointer data)
|
|
keep_going = 0;
|
|
cycle_count++;
|
|
|
|
- /* sleep_interval may be changed by socket */
|
|
- if (last_interval != sleep_interval) {
|
|
- last_interval = sleep_interval;
|
|
- g_timeout_add_seconds(sleep_interval, scan, NULL);
|
|
- return FALSE;
|
|
- }
|
|
-
|
|
if (keep_going) {
|
|
return TRUE;
|
|
} else {
|
|
- g_main_loop_quit(main_loop);
|
|
return FALSE;
|
|
}
|
|
}
|
|
@@ -626,9 +638,10 @@ int main(int argc, char** argv)
|
|
goto out;
|
|
}
|
|
log(TO_ALL, LOG_INFO, "irqbalance start scan.\n");
|
|
+ update_interval_and_count();
|
|
main_loop = g_main_loop_new(NULL, FALSE);
|
|
- last_interval = sleep_interval;
|
|
- g_timeout_add_seconds(sleep_interval, scan, NULL);
|
|
+ last_interval = real_sleep_interval;
|
|
+ g_timeout_add_seconds(real_sleep_interval, poll_hint_affinity_and_scan, NULL);
|
|
g_main_loop_run(main_loop);
|
|
|
|
g_main_loop_quit(main_loop);
|
|
diff --git a/irqbalance.h b/irqbalance.h
|
|
index 1befb46..72e141b 100644
|
|
--- a/irqbalance.h
|
|
+++ b/irqbalance.h
|
|
@@ -15,6 +15,7 @@
|
|
#include "types.h"
|
|
#include "config.h"
|
|
#include "rules_config.h"
|
|
+#include "hint_verify.h"
|
|
#ifdef __aarch64__
|
|
#define AARCH64
|
|
#endif
|
|
@@ -120,6 +122,8 @@ extern gint compare_ints(gconstpointer a, gconstpointer b);
|
|
|
|
extern unsigned long migrate_val;
|
|
extern unsigned long load_limit;
|
|
+extern int hint_enabled, poll_hint_interval;
|
|
+extern int sleep_interval;
|
|
/*
|
|
* Generic object functions
|
|
*/
|
|
diff --git a/placement.c b/placement.c
|
|
index 48ac68b..d887c60 100644
|
|
--- a/placement.c
|
|
+++ b/placement.c
|
|
@@ -41,6 +41,7 @@ static void find_best_object(struct topo_obj *d, void *data)
|
|
{
|
|
struct obj_placement *best = (struct obj_placement *)data;
|
|
uint64_t newload;
|
|
+ cpumask_t subset;
|
|
|
|
/*
|
|
* Don't consider the unspecified numa node here
|
|
@@ -58,6 +59,19 @@ static void find_best_object(struct topo_obj *d, void *data)
|
|
if (d->powersave_mode)
|
|
return;
|
|
|
|
+ /*
|
|
+ * If the hint feature is enabled, then we only want
|
|
+ * to consider objects that are within the irqs hint, but
|
|
+ * only if that irq in fact has published a hint
|
|
+ */
|
|
+ if (hint_enabled) {
|
|
+ if (!cpus_empty(best->info->affinity_hint)) {
|
|
+ cpus_and(subset, best->info->affinity_hint, d->mask);
|
|
+ if (cpus_empty(subset))
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
newload = d->load;
|
|
if (newload < best->best_cost) {
|
|
best->best = d;
|
|
diff --git a/types.h b/types.h
|
|
index e1f3dc6..c0950ee 100644
|
|
--- a/types.h
|
|
+++ b/types.h
|
|
@@ -67,6 +67,7 @@ struct irq_info {
|
|
int flags;
|
|
struct topo_obj *numa_node;
|
|
cpumask_t cpumask;
|
|
+ cpumask_t affinity_hint;
|
|
uint64_t irq_count;
|
|
uint64_t last_irq_count;
|
|
uint64_t load;
|
|
--
|
|
1.8.3.1
|
|
|
|
|