!31 同步sp2分支到master

From: @Lostwayzxc
Reviewed-by: @openeuler-basic
Signed-off-by: @openeuler-basic
This commit is contained in:
openeuler-ci-bot 2021-10-27 08:44:48 +00:00 committed by Gitee
commit a5be32ad0f
4 changed files with 1175 additions and 1 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,78 @@
From 57640072aead2e00037749d66f05fc26e3fe3071 Mon Sep 17 00:00:00 2001
From: Lostwayzxc <luoshengwei@huawei.com>
Date: Tue, 25 May 2021 20:07:26 +0800
Subject: [PATCH 2/2] add trace print of new information and add it to sqilte
Since we add new information of the event, we add trace print and store it to
Sqlite.
Signed-off-by: Luo Shengwei <luoshengwei@huawei.com>
---
ras-arm-handler.c | 10 ++++++++++
ras-record.c | 8 ++++++++
2 files changed, 18 insertions(+)
diff --git a/ras-arm-handler.c b/ras-arm-handler.c
index 10d0099..23ad470 100644
--- a/ras-arm-handler.c
+++ b/ras-arm-handler.c
@@ -23,6 +23,13 @@
#include "ras-cpu-isolation.h"
#ifdef HAVE_CPU_FAULT_ISOLATION
+static void trace_print_hex(struct trace_seq *s, const uint8_t *buf, int buf_len)
+{
+ for (int i = 0; i < buf_len; ++i) {
+ trace_seq_printf(s, "%2.2x", buf[i]);
+ }
+}
+
static int is_core_failure(unsigned long value)
{
/*
@@ -135,6 +142,7 @@ int ras_arm_event_handler(struct trace_seq *s,
case GHES_SEV_PANIC:
ev.severity = "Fatal";
}
+ trace_seq_printf(s, "\n severity: %s", ev.severity);
if (val == GHES_SEV_CORRECTED || val == GHES_SEV_RECOVERABLE) {
int len, nums;
@@ -142,6 +150,8 @@ int ras_arm_event_handler(struct trace_seq *s,
if (!ev.error_info)
return -1;
ev.length = len;
+ trace_seq_printf(s, "\n processor_err_info: ");
+ trace_print_hex(s, ev.error_info, len);
/* relate to enum error_type */
nums = count_errors(event, ev.error_info, len);
if (nums > 0) {
diff --git a/ras-record.c b/ras-record.c
index 549c494..33d4741 100644
--- a/ras-record.c
+++ b/ras-record.c
@@ -210,6 +210,10 @@ static const struct db_fields arm_event_fields[] = {
{ .name="mpidr", .type="INTEGER" },
{ .name="running_state", .type="INTEGER" },
{ .name="psci_state", .type="INTEGER" },
+#ifdef HAVE_CPU_FAULT_ISOLATION
+ { .name="severity", .type="TEXT" },
+ { .name="error_info", .type="BLOB" },
+#endif
};
static const struct db_table_descriptor arm_event_tab = {
@@ -233,6 +237,10 @@ int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev)
sqlite3_bind_int64 (priv->stmt_arm_record, 4, ev->mpidr);
sqlite3_bind_int (priv->stmt_arm_record, 5, ev->running_state);
sqlite3_bind_int (priv->stmt_arm_record, 6, ev->psci_state);
+#ifdef HAVE_CPU_FAULT_ISOLATION
+ sqlite3_bind_text (priv->stmt_arm_record, 7, ev->severity, -1, NULL);
+ sqlite3_bind_blob (priv->stmt_arm_record, 8, ev->error_info, ev->length, NULL);
+#endif
rc = sqlite3_step(priv->stmt_arm_record);
if (rc != SQLITE_OK && rc != SQLITE_DONE)
--
2.27.0

View File

@ -0,0 +1,60 @@
From 6b767a2fce615384f062ecb392cd332452bf4482 Mon Sep 17 00:00:00 2001
From: Lostwayzxc <luoshengwei@huawei.com>
Date: Wed, 1 Sep 2021 21:00:16 +0800
Subject: [PATCH] modify cpu parse for adapting to new bios version
---
ras-cpu-isolation.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/ras-cpu-isolation.c b/ras-cpu-isolation.c
index 6dcff70..b1643c4 100644
--- a/ras-cpu-isolation.c
+++ b/ras-cpu-isolation.c
@@ -25,6 +25,7 @@
static struct cpu_info *cpu_infos = NULL;
static unsigned int ncores, cores_per_socket, cores_per_die;
+static unsigned int cores_per_cluster = 4;
static unsigned int sockets, dies = 1;
static unsigned int enabled = 1;
static const char *cpu_path_format = "/sys/devices/system/cpu/cpu%d/online";
@@ -432,18 +433,33 @@ static unsigned long get_bit_value(int64_t value, unsigned offset, unsigned size
static unsigned get_cpu_index(int64_t mpidr)
{
- unsigned core_id, socket_id, die_id, cpu;
+ unsigned core_id, cluster_id, socket_id, die_id, cpu;
/*
* Adapt to certain BIOS
* In the MPIDR:
* bit 8:15: core id
+ * bit 16:18: cluster id
* bit 19:20: die_id
* bit 21:22: socket_id
*/
core_id = get_bit_value(mpidr, 8, 8);
+ cluster_id = get_bit_value(mpidr, 16, 3);
socket_id = get_bit_value(mpidr, 21, 2);
die_id = get_bit_value(mpidr, 19, 2);
- cpu = core_id + socket_id * cores_per_socket + die_id * cores_per_die;
+
+ /* When die id parsed from MPIDR is 1, it means TotemA, and when it's 3,
+ * it means TotemB. When cores per die equal to cores per socket, it means
+ * that there is only one die in the socket, in case that the only die is
+ * TotemB in CPU 1620s, we set die id to 0 directly.
+ */
+ if (cores_per_die == cores_per_socket) {
+ die_id = 0;
+ }
+ else {
+ die_id = (die_id == 1 ? 0:1);
+ }
+ cpu = core_id + socket_id * cores_per_socket + die_id * cores_per_die +
+ cluster_id * cores_per_cluster;
return cpu;
}
--
2.27.0

View File

@ -1,6 +1,6 @@
Name: rasdaemon
Version: 0.6.6
Release: 7
Release: 8
License: GPLv2
Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events
URL: https://github.com/mchehab/rasdaemon.git
@ -34,6 +34,9 @@ Patch12: 0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch
Patch13: 0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch
Patch14: 0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch
Patch15: 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch
Patch16: 0006-add-cpu-online-fault-isolation.patch
Patch17: 0007-add-trace-print-and-add-sqlite-store.patch
Patch18: 0008-modify-cpu-parse-for-adapting-to-new-bios-version.patch
%description
The rasdaemon program is a daemon which monitors the platform
@ -80,6 +83,12 @@ rm INSTALL %{buildroot}/usr/include/*.h
/usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || :
%changelog
* Wed Oct 27 2021 luoshengwei<luoshengwei@huawei.com> - 0.6.6-8
- Type:feature
- ID:NA
- SUG:NA
- DESC: Sync three patches, add cpu online fault isolation.
* Wed Oct 20 2021 tanxiaofei<tanxiaofei@huawei.com> - 0.6.6-7
- Type:Bugfix
- ID:NA