rasdaemon/0006-add-cpu-online-fault-isolation.patch
2022-01-17 14:33:01 +08:00

1015 lines
27 KiB
Diff

From 9e2d3f84c4f158dd58bce4a30eec568331749501 Mon Sep 17 00:00:00 2001
From: Lostwayzxc <luoshengwei@huawei.com>
Date: Tue, 25 May 2021 20:05:49 +0800
Subject: [PATCH] add cpu online fault isolation
Add cpu online fault isolation, when CE/UCE occurs, we choose to offline
the error cpu according to threshold algorithm.
Signed-off-by: Luo Shengwei <luoshengwei@huawei.com>
---
Makefile.am | 6 +-
configure.ac | 11 +
misc/rasdaemon.env | 17 ++
queue.c | 126 +++++++++++
queue.h | 43 ++++
ras-arm-handler.c | 73 +++++++
ras-cpu-isolation.c | 499 ++++++++++++++++++++++++++++++++++++++++++++
ras-cpu-isolation.h | 76 +++++++
ras-events.c | 8 +
ras-record.h | 5 +
10 files changed, 863 insertions(+), 1 deletion(-)
create mode 100644 queue.c
create mode 100644 queue.h
create mode 100644 ras-cpu-isolation.c
create mode 100644 ras-cpu-isolation.h
diff --git a/Makefile.am b/Makefile.am
index fabca78..242ceb7 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -63,13 +63,17 @@ endif
if WITH_AMP_NS_DECODE
rasdaemon_SOURCES += non-standard-ampere.c
endif
+if WITH_CPU_FAULT_ISOLATION
+ rasdaemon_SOURCES += ras-cpu-isolation.c queue.c
+endif
rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \
ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \
ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \
- non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h
+ non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \
+ ras-cpu-isolation.h queue.h
# This rule can't be called with more than one Makefile job (like make -j8)
# I can't figure out a way to fix that
diff --git a/configure.ac b/configure.ac
index 33b81fe..d098fcf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -161,6 +161,16 @@ AS_IF([test "x$enable_amp_ns_decode" = "xyes" || test "x$enable_all" == "xyes"],
AM_CONDITIONAL([WITH_AMP_NS_DECODE], [test x$enable_amp_ns_decode = xyes || test x$enable_all == xyes])
AM_COND_IF([WITH_AMP_NS_DECODE], [USE_AMP_NS_DECODE="yes"], [USE_AMP_NS_DECODE="no"])
+AC_ARG_ENABLE([cpu_fault_isolation],
+ AS_HELP_STRING([--enable-cpu-fault-isolation], [enable cpu online fault isolation]))
+
+AS_IF([test "x$enable_cpu_fault_isolation" = "xyes" || test "x$enable_all" == "xyes"], [
+ AC_DEFINE(HAVE_CPU_FAULT_ISOLATION,1,"have cpu online fault isolation")
+ AC_SUBST([WITH_CPU_FAULT_ISOLATION])
+])
+AM_CONDITIONAL([WITH_CPU_FAULT_ISOLATION], [test x$enable_cpu_fault_isolation = xyes || test x$enable_all == xyes])
+AM_COND_IF([WITH_CPU_FAULT_ISOLATION], [USE_CPU_FAULT_ISOLATION="yes"], [USE_CPU_FAULT_ISOLATION="no"])
+
test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc
CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes"
@@ -201,4 +211,5 @@ compile time options summary
Memory Failure : $USE_MEMORY_FAILURE
Memory CE PFA : $USE_MEMORY_CE_PFA
AMP RAS errors : $USE_AMP_NS_DECODE
+ CPU fault isolation : $USE_CPU_FAULT_ISOLATION
EOF
diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env
index 12fd766..3191d03 100644
--- a/misc/rasdaemon.env
+++ b/misc/rasdaemon.env
@@ -27,3 +27,20 @@ PAGE_CE_THRESHOLD="50"
# soft-then-hard First try to soft offline, then try hard offlining.
# Note: default offline choice is "soft".
PAGE_CE_ACTION="soft"
+
+# CPU Online Fault Isolation
+# Whether to enable cpu online fault isolation (yes|no).
+CPU_ISOLATION_ENABLE="no"
+# Specify the threshold of CE numbers.
+#
+# Format:
+# [0-9]+[unit]
+#
+# Supported units:
+# CPU_CE_THRESHOLD: no unit
+# CPU_ISOLATION_CYCLE: D|d (day), H|h (hour), M|m (minute), S|s (second), default is in second
+CPU_CE_THRESHOLD="18"
+CPU_ISOLATION_CYCLE="24h"
+
+# Prevent excessive isolation from causing an avalanche effect
+CPU_ISOLATION_LIMIT="10"
diff --git a/queue.c b/queue.c
new file mode 100644
index 0000000..92f3d3c
--- /dev/null
+++ b/queue.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include "queue.h"
+#include "ras-logger.h"
+
+
+int is_empty(struct link_queue *queue)
+{
+ if (queue) {
+ return queue->size == 0;
+ }
+
+ return 1;
+}
+
+struct link_queue* init_queue(void)
+{
+ struct link_queue* queue;
+ queue = (struct link_queue*) malloc(sizeof(struct link_queue));
+
+ if (queue == NULL) {
+ log(TERM, LOG_ERR, "Failed to allocate memory for queue.\n");
+ return NULL;
+ }
+
+ queue->size = 0;
+ queue->head = NULL;
+ queue->tail = NULL;
+
+ return queue;
+}
+
+void clear_queue(struct link_queue *queue)
+{
+ if (queue == NULL) {
+ return;
+ }
+
+ struct queue_node *node = queue->head;
+ struct queue_node *tmp = NULL;
+
+ while (node != NULL) {
+ tmp = node;
+ node = node->next;
+ free(tmp);
+ }
+
+ queue->head = NULL;
+ queue->tail = NULL;
+ queue->size = 0;
+}
+
+void free_queue(struct link_queue *queue) {
+ clear_queue(queue);
+
+ if (queue) {
+ free(queue);
+ }
+}
+
+/* It should be guranteed that the param is not NULL */
+void push(struct link_queue *queue, struct queue_node *node)
+{
+ /* there is no element in the queue */
+ if (queue->head == NULL) {
+ queue->head = node;
+ }
+ else {
+ node->next = queue->tail->next;
+ queue->tail->next = node;
+ }
+
+ queue->tail = node;
+ (queue->size)++;
+}
+
+int pop(struct link_queue *queue)
+{
+ if (queue == NULL || is_empty(queue)) {
+ return -1;
+ }
+
+ struct queue_node *tmp = NULL;
+ tmp = queue->head;
+ queue->head = queue->head->next;
+ free(tmp);
+ (queue->size)--;
+
+ return 0;
+}
+
+struct queue_node* front(struct link_queue *queue)
+{
+ if (queue == NULL) {
+ return NULL;
+ }
+
+ return queue->head;
+}
+
+struct queue_node* node_create(time_t time, unsigned value)
+{
+ struct queue_node *node = NULL;
+ node = (struct queue_node*) malloc(sizeof(struct queue_node));
+
+ if (node != NULL) {
+ node->time = time;
+ node->value = value;
+ node->next = NULL;
+ }
+
+ return node;
+}
diff --git a/queue.h b/queue.h
new file mode 100644
index 0000000..9684c58
--- /dev/null
+++ b/queue.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*/
+
+#ifndef __RAS_QUEUE_H
+#define __RAS_QUEUE_H
+
+
+struct queue_node
+{
+ time_t time;
+ unsigned value;
+ struct queue_node *next;
+};
+
+struct link_queue
+{
+ struct queue_node *head;
+ struct queue_node *tail;
+ int size;
+};
+
+int is_empty(struct link_queue *queue);
+struct link_queue* init_queue(void);
+void clear_queue(struct link_queue *queue);
+void free_queue(struct link_queue *queue);
+void push(struct link_queue *queue, struct queue_node *node);
+int pop(struct link_queue *queue);
+struct queue_node* front(struct link_queue *queue);
+struct queue_node* node_create(time_t time, unsigned value);
+
+
+#endif
\ No newline at end of file
diff --git a/ras-arm-handler.c b/ras-arm-handler.c
index 1149dc6..a64f20b 100644
--- a/ras-arm-handler.c
+++ b/ras-arm-handler.c
@@ -22,6 +22,44 @@
#include "ras-report.h"
#include "ras-non-standard-handler.h"
#include "non-standard-ampere.h"
+#include "ras-cpu-isolation.h"
+
+#ifdef HAVE_CPU_FAULT_ISOLATION
+static int is_core_failure(unsigned long value)
+{
+ /*
+ * core failure:
+ * Bit 0\1\3: (at lease 1)
+ * Bit 2: 0
+ */
+ return (value & 0xf) && !(value & (0x1 << 2));
+}
+
+static int count_errors(struct event_format *event, const uint8_t *data, int len)
+{
+ /*
+ * According to UEFI_2_9_2021_03_18 specification chapter N2.4.4,
+ * the length of struct processor error information is 32, the byte
+ * length of the Flags field is 1, and the byte offset is 7 in the struct.
+ */
+ int cur_offset = 7;
+ unsigned long value;
+ int num = 0;
+ if (len % PEI_ERR_SIZE != 0) {
+ log(TERM, LOG_ERR, "the event data does not match to the ARM Processor Error Information Structure\n");
+ return num;
+ }
+ while (cur_offset < len) {
+ value = pevent_read_number(event->pevent, data+cur_offset, FLAGS_SIZE);
+ if (is_core_failure(value)) {
+ num++;
+ log(TERM, LOG_INFO, "Error in cpu core catched\n");
+ }
+ cur_offset += PEI_ERR_SIZE;
+ }
+ return num;
+}
+#endif
void display_raw_data(struct trace_seq *s,
const uint8_t *buf,
@@ -139,6 +177,41 @@ int ras_arm_event_handler(struct trace_seq *s,
display_raw_data(s, ev.vsei_error, ev.oem_len);
#endif
+#ifdef HAVE_CPU_FAULT_ISOLATION
+ /* record cpu error */
+ if (pevent_get_field_val(s, event, "sev", record, &val, 1) < 0)
+ return -1;
+ /* refer to UEFI_2_9_2021_03_18 specification chapter N2.2 Table N-5 */
+ switch (val) {
+ case GHES_SEV_NO:
+ ev.severity = "Informational";
+ break;
+ case GHES_SEV_CORRECTED:
+ ev.severity = "Corrected";
+ break;
+ case GHES_SEV_RECOVERABLE:
+ ev.severity = "Recoverable";
+ break;
+ default:
+ case GHES_SEV_PANIC:
+ ev.severity = "Fatal";
+ }
+
+ if (val == GHES_SEV_CORRECTED || val == GHES_SEV_RECOVERABLE) {
+ int len, nums;
+ ev.error_info = pevent_get_field_raw(s, event, "buf", record, &len, 1);
+ if (!ev.error_info)
+ return -1;
+ ev.length = len;
+ /* relate to enum error_type */
+ nums = count_errors(event, ev.error_info, len);
+ if (nums > 0) {
+ struct error_info err_info = {nums, now, val};
+ ras_record_cpu_error(&err_info, ev.mpidr);
+ }
+ }
+#endif
+
/* Insert data into the SGBD */
#ifdef HAVE_SQLITE3
ras_store_arm_record(ras, &ev);
diff --git a/ras-cpu-isolation.c b/ras-cpu-isolation.c
new file mode 100644
index 0000000..6dcff70
--- /dev/null
+++ b/ras-cpu-isolation.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <limits.h>
+#include <ctype.h>
+#include "ras-logger.h"
+#include "ras-cpu-isolation.h"
+
+static struct cpu_info *cpu_infos = NULL;
+static unsigned int ncores, cores_per_socket, cores_per_die;
+static unsigned int sockets, dies = 1;
+static unsigned int enabled = 1;
+static const char *cpu_path_format = "/sys/devices/system/cpu/cpu%d/online";
+static const char *core_siblings_list_path = "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list";
+static const char *node_path = "/sys/devices/system/node/possible";
+
+static const struct param normal_units[] = {
+ { "", 1 },
+ {}
+};
+
+static const struct param cycle_units[] = {
+ { "d", 24 * 60 * 60 },
+ { "h", 60 * 60 },
+ { "m", 60 },
+ { "s", 1 },
+ {}
+};
+
+static struct isolation_param threshold = {
+ .name = "CPU_CE_THRESHOLD",
+ .units = normal_units,
+ .value = 18,
+ .limit = 10000
+};
+
+static struct isolation_param cpu_limit = {
+ .name = "CPU_ISOLATION_LIMIT",
+ .units = normal_units
+};
+
+static struct isolation_param cycle = {
+ .name = "CPU_ISOLATION_CYCLE",
+ .units = cycle_units,
+ .value = 24 * 60 * 60,
+ .limit = 30 * 24 * 60 * 60
+};
+
+static const char *cpu_state[] = {
+ [CPU_OFFLINE] = "offline",
+ [CPU_ONLINE] = "online",
+ [CPU_OFFLINE_FAILED] = "offline-failed",
+ [CPU_UNKNOWN] = "unknown"
+};
+
+static int open_sys_file(unsigned cpu, int __oflag, const char *format)
+{
+ int fd;
+ char buf[MAX_PATH_LEN] = "";
+ snprintf(buf, sizeof(buf), format, cpu);
+ fd = open(buf, __oflag);
+
+ if (fd == -1) {
+ log(TERM, LOG_ERR, "[%s]:open file: %s failed\n", __func__, buf);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int get_sockets(void)
+{
+ int fd, j;
+ char buf[MAX_BUF_LEN] = "";
+ cores_per_socket = ncores;
+ struct cpu_set *cpu_sets = (struct cpu_set *) malloc(sizeof(*cpu_sets) * ncores);
+
+ if (!cpu_sets) {
+ log(TERM, LOG_ERR, "Failed to allocate memory for cpu sets in %s.\n", __func__);
+ return -1;
+ }
+
+ for (int i = 0; i < ncores; ++i) {
+ fd = open_sys_file(i, O_RDONLY, core_siblings_list_path);
+ if (fd == -1) {
+ continue;
+ }
+ memset(buf, '\0', strlen(buf));
+ if (read(fd, buf, sizeof(buf)) <= 0) {
+ close(fd);
+ continue;
+ }
+ for (j = 0; j < sockets; ++j) {
+ if (strcmp(cpu_sets[j].buf, buf) == 0) {
+ break;
+ }
+ }
+ if (j == sockets) {
+ strcpy(cpu_sets[sockets].buf, buf);
+ sockets++;
+ }
+ close(fd);
+ }
+
+ free(cpu_sets);
+ cores_per_socket = sockets > 0 ? ncores / sockets : ncores;
+
+ return 0;
+}
+
+static int get_dies(void)
+{
+ int fd, begin, end;
+ char buf[20] = "";
+ cores_per_die = ncores;
+ fd = open(node_path, O_RDONLY);
+
+ if (fd == -1) {
+ return -1;
+ }
+
+ if (read(fd, buf, sizeof(buf))) {
+ if (sscanf(buf, "%d-%d", &begin, &end) == 2) {
+ dies = end > begin ? end - begin + 1 : 1;
+ }
+ }
+
+ close(fd);
+ cores_per_die = ncores / dies;
+
+ return 0;
+}
+
+static int get_cpu_status(unsigned cpu)
+{
+ int fd, num;
+ char buf[2] = "";
+ fd = open_sys_file(cpu, O_RDONLY, cpu_path_format);
+
+ if (fd == -1) {
+ return CPU_UNKNOWN;
+ }
+
+ if (read(fd, buf, 1) <= 0 || sscanf(buf, "%d", &num) != 1) {
+ num = CPU_UNKNOWN;
+ }
+
+ close(fd);
+
+ return (num < 0 || num > CPU_UNKNOWN) ? CPU_UNKNOWN : num;
+}
+
+static int init_cpu_info(unsigned cpus)
+{
+ ncores = cpus;
+ cpu_infos = (struct cpu_info *) malloc(sizeof(*cpu_infos) * cpus);
+
+ if (!cpu_infos) {
+ log(TERM, LOG_ERR, "Failed to allocate memory for cpu infos in %s.\n", __func__);
+ return -1;
+ }
+
+ for (unsigned int i = 0; i < cpus; ++i) {
+ cpu_infos[i].state = get_cpu_status(i);
+ cpu_infos[i].ce_queue = init_queue();
+ if (cpu_infos[i].ce_queue == NULL) {
+ log(TERM, LOG_ERR, "Failed to allocate memory for cpu ce queue in %s.\n", __func__);
+ return -1;
+ }
+ }
+ /* set limit of offlined cpu limit according to number of cpu */
+ cpu_limit.limit = cpus - 1;
+ cpu_limit.value = 0;
+
+ if (get_sockets() < 0 || get_dies() < 0) {
+ log(TERM, LOG_ERR, "Failed to get sockets or nodes of the system\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void check_config(struct isolation_param *config)
+{
+ if (config->value > config->limit) {
+ log(TERM, LOG_WARNING, "Value: %lu exceed limit: %lu, set to limit\n",
+ config->value, config->limit);
+ config->value = config->limit;
+ }
+}
+
+static int parse_ul_config(struct isolation_param *config, char *env, unsigned long *value)
+{
+ int env_size, has_unit = 0;
+
+ if (!env || strlen(env) == 0) {
+ return -1;
+ }
+
+ env_size = strlen(env);
+ char *unit = NULL;
+ unit = env + env_size - 1;
+
+ if (isalpha(*unit)) {
+ has_unit = 1;
+ env_size--;
+ if (env_size <= 0) {
+ return -1;
+ }
+ }
+
+ for (int i = 0; i < env_size; ++i) {
+ if (isdigit(env[i])) {
+ if (*value > ULONG_MAX / 10 || (*value == ULONG_MAX / 10 && env[i] - '0' > 5)) {
+ log(TERM, LOG_ERR, "%s is out of range: %lu\n", env, ULONG_MAX);
+ return -1;
+ }
+ *value = 10 * (*value) + (env[i] - '0');
+ }
+ else {
+ return -1;
+ }
+ }
+
+ if (has_unit) {
+ for (const struct param *units = config->units; units->name; units++) {
+ /* value character and unit character are both valid */
+ if (!strcasecmp(unit, units->name)) {
+ if (*value > (ULONG_MAX / units->value)) {
+ log(TERM, LOG_ERR, "%s is out of range: %lu\n", env, ULONG_MAX);
+ return -1;
+ }
+ *value = (*value) * units->value;
+ return 0;
+ }
+ }
+ log(TERM, LOG_ERR, "Invalid unit %s\n", unit);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void init_config(struct isolation_param *config)
+{
+ char *env = getenv(config->name);
+ unsigned long value = 0;
+
+ if (parse_ul_config(config, env, &value) < 0) {
+ log(TERM, LOG_ERR, "Invalid %s: %s! Use default value %ld.\n",
+ config->name, env, config->value);
+ return;
+ }
+
+ config->value = value;
+ check_config(config);
+}
+
+static int check_config_status(void)
+{
+ char *env = getenv("CPU_ISOLATION_ENABLE");
+
+ if (env == NULL || strcasecmp(env, "yes")) {
+ return -1;
+ }
+
+ return 0;
+}
+
+void ras_error_count_init(unsigned cpus)
+{
+ if (init_cpu_info(cpus) < 0 || check_config_status() < 0) {
+ enabled = 0;
+ log(TERM, LOG_WARNING, "Cpu fault isolation is disabled\n");
+ return;
+ }
+
+ log(TERM, LOG_INFO, "Cpu fault isolation is enabled\n");
+ init_config(&threshold);
+ init_config(&cpu_limit);
+ init_config(&cycle);
+}
+
+void cpu_infos_free(void)
+{
+ if (cpu_infos) {
+ for (int i = 0; i < ncores; ++i) {
+ free_queue(cpu_infos[i].ce_queue);
+ }
+ free(cpu_infos);
+ }
+}
+
+static int do_cpu_offline(unsigned cpu)
+{
+ int fd, rc;
+ char buf[2] = "";
+ cpu_infos[cpu].state = CPU_OFFLINE_FAILED;
+ fd = open_sys_file(cpu, O_RDWR, cpu_path_format);
+
+ if (fd == -1) {
+ return HANDLE_FAILED;
+ }
+
+ strcpy(buf, "0");
+ rc = write(fd, buf, strlen(buf));
+
+ if (rc < 0) {
+ log(TERM, LOG_ERR, "cpu%d offline failed, errno:%d\n", cpu, errno);
+ close(fd);
+ return HANDLE_FAILED;
+ }
+
+ close(fd);
+ /* check wthether the cpu is isolated successfully */
+ cpu_infos[cpu].state = get_cpu_status(cpu);
+
+ if (cpu_infos[cpu].state == CPU_OFFLINE) {
+ return HANDLE_SUCCEED;
+ }
+
+ return HANDLE_FAILED;
+}
+
+static int do_ce_handler(unsigned cpu)
+{
+ struct link_queue *queue = cpu_infos[cpu].ce_queue;
+ unsigned tmp;
+ /*
+ * Since we just count all error numbers in setted cycle, we store the time
+ * and error numbers from current event to the queue, then everytime we
+ * calculate the period from beginning time to ending time, if the period
+ * exceeds setted cycle, we pop the beginning time and error until the period
+ * from new beginning time to ending time is less than cycle.
+ */
+ while (queue->head && queue->tail && queue->tail->time - queue->head->time > cycle.value) {
+ tmp = queue->head->value;
+ if (pop(queue) == 0) {
+ cpu_infos[cpu].ce_nums -= tmp;
+ }
+ }
+
+ if (cpu_infos[cpu].ce_nums >= threshold.value) {
+ log(TERM, LOG_INFO, "Corrected Errors exceeded threshold %ld, try to offline cpu%d\n",
+ threshold.value, cpu);
+ return do_cpu_offline(cpu);
+ }
+ return HANDLE_NOTHING;
+}
+
+static int do_uce_handler(unsigned cpu)
+{
+ if (cpu_infos[cpu].uce_nums > 0) {
+ log(TERM, LOG_INFO, "Uncorrected Errors occured, try to offline cpu%d\n", cpu);
+ return do_cpu_offline(cpu);
+ }
+ return HANDLE_NOTHING;
+}
+
+static int error_handler(unsigned cpu, struct error_info *err_info)
+{
+ int ret = HANDLE_NOTHING;
+
+ switch (err_info->err_type)
+ {
+ case CE:
+ ret = do_ce_handler(cpu);
+ break;
+ case UCE:
+ ret = do_uce_handler(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void record_error_info(unsigned cpu, struct error_info *err_info)
+{
+ switch (err_info->err_type)
+ {
+ case CE:
+ {
+ struct queue_node *node = NULL;
+ node = node_create(err_info->time, err_info->nums);
+ if (node == NULL) {
+ log(TERM, LOG_ERR, "Fail to allocate memory for queue node\n");
+ return;
+ }
+ push(cpu_infos[cpu].ce_queue, node);
+ cpu_infos[cpu].ce_nums += err_info->nums;
+ break;
+ }
+ case UCE:
+ cpu_infos[cpu].uce_nums++;
+ break;
+ default:
+ break;
+ }
+}
+
+static unsigned long get_bit_value(int64_t value, unsigned offset, unsigned size)
+{
+ value >>= offset;
+ unsigned long res = 0;
+ int i = 0;
+
+ while (i < size) {
+ res |= (value & (0x1 << (i++)));
+ }
+
+ return res;
+}
+
+static unsigned get_cpu_index(int64_t mpidr)
+{
+ unsigned core_id, socket_id, die_id, cpu;
+ /*
+ * Adapt to certain BIOS
+ * In the MPIDR:
+ * bit 8:15: core id
+ * bit 19:20: die_id
+ * bit 21:22: socket_id
+ */
+ core_id = get_bit_value(mpidr, 8, 8);
+ socket_id = get_bit_value(mpidr, 21, 2);
+ die_id = get_bit_value(mpidr, 19, 2);
+ cpu = core_id + socket_id * cores_per_socket + die_id * cores_per_die;
+
+ return cpu;
+}
+
+void ras_record_cpu_error(struct error_info *err_info, int64_t mpidr)
+{
+ unsigned cpu;
+ int ret;
+
+ if (enabled == 0) {
+ return;
+ }
+
+ cpu = get_cpu_index(mpidr);
+
+ if (cpu >= ncores) {
+ log(TERM, LOG_ERR, "The current cpu %d has exceed the total number of cpu:%d\n", cpu, ncores);
+ return;
+ }
+
+ log(TERM, LOG_INFO, "Handling error on cpu%d\n", cpu);
+ cpu_infos[cpu].state = get_cpu_status(cpu);
+
+ if (cpu_infos[cpu].state != CPU_ONLINE) {
+ log(TERM, LOG_INFO, "Cpu%d is not online or unknown, ignore\n", cpu);
+ return;
+ }
+
+ record_error_info(cpu, err_info);
+ /* Since user may change cpu state, we get current offlined cpu numbers every recording time. */
+ if (ncores - sysconf(_SC_NPROCESSORS_ONLN) >= cpu_limit.value) {
+ log(TERM, LOG_WARNING, "Offlined cpus have exceeded limit: %lu, choose to do nothing\n",
+ cpu_limit.value);
+ return;
+ }
+
+ ret = error_handler(cpu, err_info);
+
+ if (ret == HANDLE_NOTHING) {
+ log(TERM, LOG_WARNING, "Doing nothing in the cpu%d\n", cpu);
+ }
+ else if (ret == HANDLE_SUCCEED) {
+ log(TERM, LOG_INFO, "Offline cpu%d succeed, the state is %s\n",
+ cpu, cpu_state[cpu_infos[cpu].state]);
+ clear_queue(cpu_infos[cpu].ce_queue);
+ }
+ else {
+ log(TERM, LOG_INFO, "Offline cpu%d fail, the state is %s\n",
+ cpu, cpu_state[cpu_infos[cpu].state]);
+ }
+
+ return;
+}
diff --git a/ras-cpu-isolation.h b/ras-cpu-isolation.h
new file mode 100644
index 0000000..a7d3fdb
--- /dev/null
+++ b/ras-cpu-isolation.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*/
+
+#ifndef __RAS_CPU_ISOLATION_H
+#define __RAS_CPU_ISOLATION_H
+
+#include "queue.h"
+
+#define MAX_PATH_LEN 100
+#define MAX_BUF_LEN 1024
+#define PEI_ERR_SIZE 32
+#define FLAGS_SIZE 1
+
+struct param {
+ char *name;
+ unsigned long value;
+};
+
+struct isolation_param {
+ char *name;
+ const struct param *units;
+ unsigned long value;
+ unsigned long limit;
+};
+
+enum cpu_state {
+ CPU_OFFLINE,
+ CPU_ONLINE,
+ CPU_OFFLINE_FAILED,
+ CPU_UNKNOWN,
+};
+
+enum error_handle_result {
+ HANDLE_FAILED = -1,
+ HANDLE_SUCCEED,
+ HANDLE_NOTHING,
+};
+
+enum error_type {
+ CE = 1,
+ UCE
+};
+
+struct cpu_info {
+ unsigned long uce_nums;
+ unsigned long ce_nums;
+ struct link_queue *ce_queue;
+ enum cpu_state state;
+};
+
+struct error_info {
+ unsigned long nums;
+ time_t time;
+ enum error_type err_type;
+};
+
+struct cpu_set {
+ char buf[MAX_BUF_LEN];
+};
+
+void ras_error_count_init(unsigned cpus);
+void ras_record_cpu_error(struct error_info *err_info, int64_t mpidr);
+void cpu_infos_free(void);
+
+#endif
\ No newline at end of file
diff --git a/ras-events.c b/ras-events.c
index ba769d1..00938e6 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -41,6 +41,7 @@
#include "ras-record.h"
#include "ras-logger.h"
#include "ras-page-isolation.h"
+#include "ras-cpu-isolation.h"
/*
* Polling time, if read() doesn't block. Currently, trace_pipe_raw never
@@ -879,6 +880,10 @@ int handle_ras_events(int record_events)
cpus = get_num_cpus(ras);
+#ifdef HAVE_CPU_FAULT_ISOLATION
+ ras_error_count_init(cpus);
+#endif
+
#ifdef HAVE_MCE
rc = register_mce_handler(ras, cpus);
if (rc)
@@ -1005,6 +1010,9 @@ err:
}
free(ras);
}
+#ifdef HAVE_CPU_FAULT_ISOLATION
+ cpu_infos_free();
+#endif
return rc;
}
diff --git a/ras-record.h b/ras-record.h
index d9f7733..efaffa5 100644
--- a/ras-record.h
+++ b/ras-record.h
@@ -83,6 +83,11 @@ struct ras_arm_event {
uint32_t ctx_len;
const uint8_t *vsei_error;
uint32_t oem_len;
+#ifdef HAVE_CPU_FAULT_ISOLATION
+ const char *severity;
+ const uint8_t *error_info;
+ uint32_t length;
+#endif
};
struct devlink_event {
--
2.27.0