507 lines
13 KiB
Diff
507 lines
13 KiB
Diff
From 8541a67cdc3cbaf5c6ed04e9a3b3e2c3f584cb33 Mon Sep 17 00:00:00 2001
|
|
From: xiahuang <xiashuang1@huawei.com>
|
|
Date: Thu, 21 Mar 2019 19:08:17 -0400
|
|
Subject: [PATCH 2/2]
|
|
feature-introduce-verifyhint-to-detect-hint-variation
|
|
|
|
adapt by xiashuang xiashuang1@huawei.com when upgrade to 1.4.0
|
|
---
|
|
activate.c | 24 +++++-----
|
|
classify.c | 18 ++++++--
|
|
cpumask.h | 7 +++
|
|
irqbalance.c | 135 +++++++++++++++++++++++++++++++++++++++++++------------
|
|
irqbalance.h | 5 +++
|
|
placement.c | 14 ++++++
|
|
procinterrupts.c | 57 +++++++++++++++++++++++
|
|
types.h | 1 +
|
|
8 files changed, 217 insertions(+), 44 deletions(-)
|
|
|
|
diff --git a/activate.c b/activate.c
|
|
index 8fd3dd0..1c4b867 100644
|
|
--- a/activate.c
|
|
+++ b/activate.c
|
|
@@ -61,26 +61,26 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un
|
|
char buf[PATH_MAX];
|
|
FILE *file;
|
|
cpumask_t applied_mask;
|
|
- int valid_mask = 0;
|
|
|
|
- /*
|
|
- * only activate mappings for irqs that have moved
|
|
- */
|
|
- if (!info->moved)
|
|
+ if (!info->assigned_obj)
|
|
return;
|
|
|
|
- if (info->assigned_obj) {
|
|
- applied_mask = info->assigned_obj->mask;
|
|
- valid_mask = 1;
|
|
+ applied_mask = info->assigned_obj->mask;
|
|
+
|
|
+ if (hint_enabled) {
|
|
+ if (!cpus_empty(info->affinity_hint)) {
|
|
+ cpus_and(applied_mask, applied_mask, info->affinity_hint);
|
|
+ if (!cpus_intersects(applied_mask, unbanned_cpus)) {
|
|
+ log(TO_ALL, LOG_WARNING, "irq %d affinity_hint subset empty\n", info->irq);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
}
|
|
|
|
/*
|
|
* Don't activate anything for which we have an invalid mask
|
|
*/
|
|
- if (!valid_mask || check_affinity(info, applied_mask))
|
|
- return;
|
|
-
|
|
- if (!info->assigned_obj)
|
|
+ if (check_affinity(info, applied_mask))
|
|
return;
|
|
|
|
sprintf(buf, "/proc/irq/%i/smp_affinity", info->irq);
|
|
diff --git a/classify.c b/classify.c
|
|
index 30a8d2a..aa391f4 100644
|
|
--- a/classify.c
|
|
+++ b/classify.c
|
|
@@ -66,8 +66,6 @@ struct pci_info {
|
|
#define PCI_SUB_DEVICE_EMC_0568 0x0568
|
|
#define PCI_SUB_DEVICE_EMC_dd00 0xdd00
|
|
|
|
-extern void force_rebalance_irq(struct irq_info *info, void *data __attribute__((unused)));
|
|
-
|
|
/*
|
|
* Apply software workarounds for some special devices
|
|
*
|
|
@@ -414,7 +412,7 @@ get_numa_node:
|
|
fd = fopen(path, "r");
|
|
if (!fd) {
|
|
cpus_setall(new->cpumask);
|
|
- goto out;
|
|
+ goto assign_affinity_hint;
|
|
}
|
|
lcpu_mask = NULL;
|
|
ret = getline(&lcpu_mask, &blen, fd);
|
|
@@ -426,6 +424,20 @@ get_numa_node:
|
|
}
|
|
free(lcpu_mask);
|
|
|
|
+assign_affinity_hint:
|
|
+ cpus_clear(new->affinity_hint);
|
|
+ sprintf(path, "/proc/irq/%d/affinity_hint", irq);
|
|
+ fd = fopen(path, "r");
|
|
+ if (!fd)
|
|
+ goto out;
|
|
+ lcpu_mask = NULL;
|
|
+ ret = getline(&lcpu_mask, &blen, fd);
|
|
+ fclose(fd);
|
|
+ if (ret <= 0)
|
|
+ goto out;
|
|
+ cpumask_parse_user(lcpu_mask, ret, new->affinity_hint);
|
|
+ free(lcpu_mask);
|
|
+
|
|
out:
|
|
log(TO_CONSOLE, LOG_INFO, "Adding IRQ %d to database\n", irq);
|
|
return new;
|
|
diff --git a/cpumask.h b/cpumask.h
|
|
index 0774a88..8dd3703 100644
|
|
--- a/cpumask.h
|
|
+++ b/cpumask.h
|
|
@@ -30,6 +30,7 @@
|
|
* void cpus_xor(dst, src1, src2) dst = src1 ^ src2
|
|
* void cpus_andnot(dst, src1, src2) dst = src1 & ~src2
|
|
* void cpus_complement(dst, src) dst = ~src
|
|
+ * void cpumask_copy(dst, src)dst = src
|
|
*
|
|
* int cpus_equal(mask1, mask2) Does mask1 == mask2?
|
|
* int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect?
|
|
@@ -150,6 +151,12 @@ static inline void __cpus_complement(cpumask_t *dstp,
|
|
bitmap_complement(dstp->bits, srcp->bits, nbits);
|
|
}
|
|
|
|
+#define cpumask_copy(dst, src) __cpumask_copy(&(dst), &(src), NR_CPUS)
|
|
+static inline void __cpumask_copy(cpumask_t *dstp, const cpumask_t *srcp, int nbits)
|
|
+{
|
|
+ bitmap_copy(dstp->bits, srcp->bits, nbits);
|
|
+}
|
|
+
|
|
#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
|
|
static inline int __cpus_equal(const cpumask_t *src1p,
|
|
const cpumask_t *src2p, int nbits)
|
|
diff --git a/irqbalance.c b/irqbalance.c
|
|
index e375a1a..bf9a2e4 100644
|
|
--- a/irqbalance.c
|
|
+++ b/irqbalance.c
|
|
@@ -61,10 +61,38 @@ char *banscript = NULL;
|
|
char *polscript = NULL;
|
|
long HZ;
|
|
int sleep_interval = SLEEP_INTERVAL;
|
|
-GMainLoop *main_loop;
|
|
+int hint_enabled = 0;
|
|
+int poll_hint_interval = SLEEP_INTERVAL / 5;
|
|
+unsigned int next_verify, next_balance;
|
|
|
|
char *banned_cpumask_from_ui = NULL;
|
|
|
|
+static inline void reset_verify_countdown(void)
|
|
+{
|
|
+ next_verify = poll_hint_interval;
|
|
+}
|
|
+
|
|
+static inline void reset_balance_countdown(void)
|
|
+{
|
|
+ next_balance = sleep_interval;
|
|
+}
|
|
+
|
|
+static inline int need_verify(void)
|
|
+{
|
|
+ return !(poll_hint_interval - next_verify);
|
|
+}
|
|
+
|
|
+static inline int need_balance(void)
|
|
+{
|
|
+ return !(sleep_interval - next_balance);
|
|
+}
|
|
+
|
|
+static inline void reset_countdown(void)
|
|
+{
|
|
+ reset_verify_countdown();
|
|
+ reset_balance_countdown();
|
|
+}
|
|
+
|
|
static void sleep_approx(int seconds)
|
|
{
|
|
struct timespec ts;
|
|
@@ -79,6 +107,26 @@ static void sleep_approx(int seconds)
|
|
nanosleep(&ts, NULL);
|
|
}
|
|
|
|
+static void cond_sleep(void)
|
|
+{
|
|
+ unsigned int timeout;
|
|
+
|
|
+ if (next_verify < next_balance) {
|
|
+ timeout = next_verify;
|
|
+ next_balance -= timeout;
|
|
+ reset_verify_countdown();
|
|
+ } else if (next_verify > next_balance) {
|
|
+ timeout = next_balance;
|
|
+ next_verify -= timeout;
|
|
+ reset_balance_countdown();
|
|
+ } else {
|
|
+ timeout = next_verify;
|
|
+ reset_countdown();
|
|
+ }
|
|
+
|
|
+ sleep_approx(timeout);
|
|
+}
|
|
+
|
|
#ifdef HAVE_GETOPT_LONG
|
|
struct option lopts[] = {
|
|
{"oneshot", 0, NULL, 'o'},
|
|
@@ -95,14 +143,15 @@ struct option lopts[] = {
|
|
{"banmod", 1 , NULL, 'm'},
|
|
{"interval", 1 , NULL, 't'},
|
|
{"version", 0, NULL, 'V'},
|
|
+ {"verifyhint", 1, NULL, 'v'},
|
|
{0, 0, 0, 0}
|
|
};
|
|
|
|
static void usage(void)
|
|
{
|
|
- log(TO_CONSOLE, LOG_INFO, "irqbalance [--oneshot | -o] [--debug | -d] [--foreground | -f] [--journal | -j] [--hintpolicy= | -h [exact|subset|ignore]]\n");
|
|
+ log(TO_CONSOLE, LOG_INFO, "irqbalance [--oneshot | -o] [--debug | -d] [--foreground | -f] [--journal | -j] [--hintpolicy | -h <subset>]\n");
|
|
log(TO_CONSOLE, LOG_INFO, " [--powerthresh= | -p <off> | <n>] [--banirq= | -i <n>] [--banmod= | -m <module>] [--policyscript= | -l <script>]\n");
|
|
- log(TO_CONSOLE, LOG_INFO, " [--pid= | -s <file>] [--deepestcache= | -c <n>] [--interval= | -t <n>]\n");
|
|
+ log(TO_CONSOLE, LOG_INFO, " [--pid= | -s <file>] [--deepestcache= | -c <n>] [--interval= | -t <n>] [--verifyhint= | -v n]\n");
|
|
}
|
|
|
|
static void version(void)
|
|
@@ -117,7 +166,7 @@ static void parse_command_line(int argc, char **argv)
|
|
unsigned long val;
|
|
|
|
while ((opt = getopt_long(argc, argv,
|
|
- "odfji:p:s:c:b:l:m:t:V",
|
|
+ "odfji:p:s:c:b:l:m:t:V:h:v:",
|
|
lopts, &longind)) != -1) {
|
|
|
|
switch(opt) {
|
|
@@ -190,12 +239,30 @@ static void parse_command_line(int argc, char **argv)
|
|
journal_logging=1;
|
|
foreground_mode=1;
|
|
break;
|
|
- case 't':
|
|
+ case 't':
|
|
sleep_interval = strtol(optarg, NULL, 10);
|
|
if (sleep_interval < 1) {
|
|
usage();
|
|
exit(1);
|
|
}
|
|
+ reset_balance_countdown();
|
|
+ break;
|
|
+ case 'h':
|
|
+ if (!strncmp(optarg, "subset", strlen(optarg)))
|
|
+ hint_enabled = 1;
|
|
+ else {
|
|
+ usage();
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ break;
|
|
+ case 'v':
|
|
+ poll_hint_interval = strtol(optarg, NULL, 10);
|
|
+ if (poll_hint_interval < 1) {
|
|
+ usage();
|
|
+ exit(1);
|
|
+ }
|
|
+ reset_verify_countdown();
|
|
break;
|
|
}
|
|
}
|
|
@@ -251,21 +318,18 @@ void force_rebalance_irq(struct irq_info *info, void *data __attribute__((unused
|
|
info->assigned_obj = NULL;
|
|
}
|
|
|
|
-gboolean handler(gpointer data __attribute__((unused)))
|
|
+void handler(int signum __attribute__((unused)))
|
|
{
|
|
keep_going = 0;
|
|
- g_main_loop_quit(main_loop);
|
|
- return TRUE;
|
|
}
|
|
|
|
-gboolean force_rescan(gpointer data __attribute__((unused)))
|
|
+void force_rescan(int signum __attribute__((unused)))
|
|
{
|
|
if (cycle_count)
|
|
need_rescan = 1;
|
|
- return TRUE;
|
|
}
|
|
|
|
-gboolean scan(gpointer data)
|
|
+gboolean scan()
|
|
{
|
|
log(TO_CONSOLE, LOG_INFO, "\n\n\n-----------------------------------------------------------------------------\n");
|
|
clear_work_stats();
|
|
@@ -276,7 +340,7 @@ gboolean scan(gpointer data)
|
|
if (need_rescan) {
|
|
need_rescan = 0;
|
|
cycle_count = 0;
|
|
- log(TO_CONSOLE, LOG_INFO, "Rescanning cpu topology \n");
|
|
+ reset_countdown();
|
|
clear_work_stats();
|
|
|
|
free_object_tree();
|
|
@@ -303,16 +367,9 @@ gboolean scan(gpointer data)
|
|
keep_going = 0;
|
|
cycle_count++;
|
|
|
|
- if (data != &sleep_interval) {
|
|
- data = &sleep_interval;
|
|
- g_timeout_add_seconds(sleep_interval, scan, data);
|
|
- return FALSE;
|
|
- }
|
|
-
|
|
if (keep_going) {
|
|
return TRUE;
|
|
} else {
|
|
- g_main_loop_quit(main_loop);
|
|
return FALSE;
|
|
}
|
|
}
|
|
@@ -501,6 +558,7 @@ int init_socket(char *socket_name)
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
+ struct sigaction action, hupaction;
|
|
sigset_t sigset, old_sigset;
|
|
|
|
sigemptyset(&sigset);
|
|
@@ -600,11 +658,19 @@ int main(int argc, char** argv)
|
|
}
|
|
}
|
|
|
|
- g_unix_signal_add(SIGINT, handler, NULL);
|
|
- g_unix_signal_add(SIGTERM, handler, NULL);
|
|
- g_unix_signal_add(SIGUSR1, handler, NULL);
|
|
- g_unix_signal_add(SIGUSR2, handler, NULL);
|
|
- g_unix_signal_add(SIGHUP, force_rescan, NULL);
|
|
+ action.sa_handler = handler;
|
|
+ sigemptyset(&action.sa_mask);
|
|
+ action.sa_flags = 0;
|
|
+ sigaction(SIGINT, &action, NULL);
|
|
+ sigaction(SIGTERM, &action, NULL);
|
|
+ sigaction(SIGUSR1, &action, NULL);
|
|
+ sigaction(SIGUSR2, &action, NULL);
|
|
+
|
|
+ hupaction.sa_handler = force_rescan;
|
|
+ sigemptyset(&hupaction.sa_mask);
|
|
+ hupaction.sa_flags = 0;
|
|
+ sigaction(SIGHUP, &hupaction, NULL);
|
|
+
|
|
sigprocmask(SIG_SETMASK, &old_sigset, NULL);
|
|
|
|
#ifdef HAVE_LIBCAP_NG
|
|
@@ -624,12 +690,23 @@ int main(int argc, char** argv)
|
|
if (init_socket()) {
|
|
return EXIT_FAILURE;
|
|
}
|
|
- main_loop = g_main_loop_new(NULL, FALSE);
|
|
- int *last_interval = &sleep_interval;
|
|
- g_timeout_add_seconds(sleep_interval, scan, last_interval);
|
|
- g_main_loop_run(main_loop);
|
|
|
|
- g_main_loop_quit(main_loop);
|
|
+ while (keep_going) {
|
|
+ cond_sleep();
|
|
+
|
|
+ if (need_verify() && hint_changed()) {
|
|
+ for_each_irq(NULL, update_affinity_hint, NULL);
|
|
+ reset_countdown();
|
|
+ }
|
|
+
|
|
+ if (!need_balance()) {
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (!scan()) {
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
|
|
free_object_tree();
|
|
free_cl_opts();
|
|
diff --git a/irqbalance.h b/irqbalance.h
|
|
index 73737ed..b8141aa 100644
|
|
--- a/irqbalance.h
|
|
+++ b/irqbalance.h
|
|
@@ -106,12 +106,17 @@ extern void migrate_irq(GList **from, GList **to, struct irq_info *info);
|
|
extern void free_cl_opts(void);
|
|
extern void add_cl_banned_module(char *modname);
|
|
#define irq_numa_node(irq) ((irq)->numa_node)
|
|
+extern void force_rebalance_irq(struct irq_info *info, void *data __attribute__((unused)));
|
|
|
|
/* huawei */
|
|
extern struct irq_info *build_one_dev_entry(const char *dirname, GList *tmp_list);
|
|
extern void find_irq_dev_path(int irq, char *dirname, int length);
|
|
extern struct irq_info *add_new_irq(int irq, struct irq_info *hint, GList *proc_interrupts);
|
|
extern void clear_no_existing_irqs(void);
|
|
+extern void update_affinity_hint(struct irq_info *info, void *data __attribute__((unused)));
|
|
+extern int hint_changed(void);
|
|
+
|
|
+extern int hint_enabled, poll_hint_interval;
|
|
|
|
/*
|
|
* Generic object functions
|
|
diff --git a/placement.c b/placement.c
|
|
index 5a82111..2085e09 100644
|
|
--- a/placement.c
|
|
+++ b/placement.c
|
|
@@ -41,6 +41,7 @@ static void find_best_object(struct topo_obj *d, void *data)
|
|
{
|
|
struct obj_placement *best = (struct obj_placement *)data;
|
|
uint64_t newload;
|
|
+ cpumask_t subset;
|
|
|
|
/*
|
|
* Don't consider the unspecified numa node here
|
|
@@ -59,6 +60,19 @@ static void find_best_object(struct topo_obj *d, void *data)
|
|
if (d->powersave_mode)
|
|
return;
|
|
|
|
+ /*
|
|
+ * If the hint feature is enabled, then we only want
|
|
+ * to consider objects that are within the irqs hint, but
|
|
+ * only if that irq in fact has published a hint
|
|
+ */
|
|
+ if (hint_enabled) {
|
|
+ if (!cpus_empty(best->info->affinity_hint)) {
|
|
+ cpus_and(subset, best->info->affinity_hint, d->mask);
|
|
+ if (cpus_empty(subset))
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
newload = d->load;
|
|
if (newload < best->best_cost) {
|
|
best->best = d;
|
|
diff --git a/procinterrupts.c b/procinterrupts.c
|
|
index d384860..b446c55 100644
|
|
--- a/procinterrupts.c
|
|
+++ b/procinterrupts.c
|
|
@@ -544,3 +544,60 @@ void parse_proc_stat(void)
|
|
for_each_object(numa_nodes, compute_irq_branch_load_share, NULL);
|
|
|
|
}
|
|
+
|
|
+int hint_changed(void)
|
|
+{
|
|
+ FILE *file;
|
|
+ char *line = NULL;
|
|
+ size_t size = 0;
|
|
+ int changed = 0;
|
|
+
|
|
+ file = fopen("/proc/irq/affinity_hint_notify", "r+");
|
|
+ if (!file)
|
|
+ return changed;
|
|
+
|
|
+ if (getline(&line, &size, file) > 0 && *line != '0') {
|
|
+ fprintf(file, "Done");
|
|
+ changed = 1;
|
|
+ }
|
|
+
|
|
+ fclose(file);
|
|
+ if (line)
|
|
+ free(line);
|
|
+ return changed;
|
|
+}
|
|
+
|
|
+void update_affinity_hint(struct irq_info *info, void *data __attribute__((unused)))
|
|
+{
|
|
+ FILE *file = NULL;
|
|
+ cpumask_t current_affinity_hint;
|
|
+ char path[PATH_MAX];
|
|
+ char *line = NULL;
|
|
+ size_t size = 0;
|
|
+ ssize_t len;
|
|
+
|
|
+ if (!hint_enabled)
|
|
+ return;
|
|
+
|
|
+ cpus_clear(info->affinity_hint);
|
|
+ sprintf(path, "/proc/irq/%d/affinity_hint", info->irq);
|
|
+ file = fopen(path, "r");
|
|
+ if (!file)
|
|
+ return;
|
|
+
|
|
+ len = getline(&line, &size, file);
|
|
+ fclose(file);
|
|
+
|
|
+ if (len > 0) {
|
|
+ cpumask_parse_user(line, len, current_affinity_hint);
|
|
+ if (!cpus_equal(current_affinity_hint, info->affinity_hint)) {
|
|
+ cpumask_copy(info->affinity_hint, current_affinity_hint);
|
|
+ force_rebalance_irq(info, data);
|
|
+ log(TO_ALL, LOG_INFO, "IRQ(%d): affinity hint modified\n", info->irq);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (line)
|
|
+ free(line);
|
|
+}
|
|
+
|
|
diff --git a/types.h b/types.h
|
|
index 9693cf4..62cc2bb 100644
|
|
--- a/types.h
|
|
+++ b/types.h
|
|
@@ -66,6 +66,7 @@ struct irq_info {
|
|
int flags;
|
|
struct topo_obj *numa_node;
|
|
cpumask_t cpumask;
|
|
+ cpumask_t affinity_hint;
|
|
uint64_t irq_count;
|
|
uint64_t last_irq_count;
|
|
uint64_t load;
|
|
--
|
|
1.8.3.1
|
|
|