sync three patches from sp2
This commit is contained in:
parent
3f52aad34c
commit
130f7d6303
1027
0006-add-cpu-online-fault-isolation.patch
Normal file
1027
0006-add-cpu-online-fault-isolation.patch
Normal file
File diff suppressed because it is too large
Load Diff
78
0007-add-trace-print-and-add-sqlite-store.patch
Normal file
78
0007-add-trace-print-and-add-sqlite-store.patch
Normal file
@ -0,0 +1,78 @@
|
||||
From 57640072aead2e00037749d66f05fc26e3fe3071 Mon Sep 17 00:00:00 2001
|
||||
From: Lostwayzxc <luoshengwei@huawei.com>
|
||||
Date: Tue, 25 May 2021 20:07:26 +0800
|
||||
Subject: [PATCH 2/2] add trace print of new information and add it to sqilte
|
||||
|
||||
Since we add new information of the event, we add trace print and store it to
|
||||
Sqlite.
|
||||
|
||||
Signed-off-by: Luo Shengwei <luoshengwei@huawei.com>
|
||||
---
|
||||
ras-arm-handler.c | 10 ++++++++++
|
||||
ras-record.c | 8 ++++++++
|
||||
2 files changed, 18 insertions(+)
|
||||
|
||||
diff --git a/ras-arm-handler.c b/ras-arm-handler.c
|
||||
index 10d0099..23ad470 100644
|
||||
--- a/ras-arm-handler.c
|
||||
+++ b/ras-arm-handler.c
|
||||
@@ -23,6 +23,13 @@
|
||||
#include "ras-cpu-isolation.h"
|
||||
|
||||
#ifdef HAVE_CPU_FAULT_ISOLATION
|
||||
+static void trace_print_hex(struct trace_seq *s, const uint8_t *buf, int buf_len)
|
||||
+{
|
||||
+ for (int i = 0; i < buf_len; ++i) {
|
||||
+ trace_seq_printf(s, "%2.2x", buf[i]);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int is_core_failure(unsigned long value)
|
||||
{
|
||||
/*
|
||||
@@ -135,6 +142,7 @@ int ras_arm_event_handler(struct trace_seq *s,
|
||||
case GHES_SEV_PANIC:
|
||||
ev.severity = "Fatal";
|
||||
}
|
||||
+ trace_seq_printf(s, "\n severity: %s", ev.severity);
|
||||
|
||||
if (val == GHES_SEV_CORRECTED || val == GHES_SEV_RECOVERABLE) {
|
||||
int len, nums;
|
||||
@@ -142,6 +150,8 @@ int ras_arm_event_handler(struct trace_seq *s,
|
||||
if (!ev.error_info)
|
||||
return -1;
|
||||
ev.length = len;
|
||||
+ trace_seq_printf(s, "\n processor_err_info: ");
|
||||
+ trace_print_hex(s, ev.error_info, len);
|
||||
/* relate to enum error_type */
|
||||
nums = count_errors(event, ev.error_info, len);
|
||||
if (nums > 0) {
|
||||
diff --git a/ras-record.c b/ras-record.c
|
||||
index 549c494..33d4741 100644
|
||||
--- a/ras-record.c
|
||||
+++ b/ras-record.c
|
||||
@@ -210,6 +210,10 @@ static const struct db_fields arm_event_fields[] = {
|
||||
{ .name="mpidr", .type="INTEGER" },
|
||||
{ .name="running_state", .type="INTEGER" },
|
||||
{ .name="psci_state", .type="INTEGER" },
|
||||
+#ifdef HAVE_CPU_FAULT_ISOLATION
|
||||
+ { .name="severity", .type="TEXT" },
|
||||
+ { .name="error_info", .type="BLOB" },
|
||||
+#endif
|
||||
};
|
||||
|
||||
static const struct db_table_descriptor arm_event_tab = {
|
||||
@@ -233,6 +237,10 @@ int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev)
|
||||
sqlite3_bind_int64 (priv->stmt_arm_record, 4, ev->mpidr);
|
||||
sqlite3_bind_int (priv->stmt_arm_record, 5, ev->running_state);
|
||||
sqlite3_bind_int (priv->stmt_arm_record, 6, ev->psci_state);
|
||||
+#ifdef HAVE_CPU_FAULT_ISOLATION
|
||||
+ sqlite3_bind_text (priv->stmt_arm_record, 7, ev->severity, -1, NULL);
|
||||
+ sqlite3_bind_blob (priv->stmt_arm_record, 8, ev->error_info, ev->length, NULL);
|
||||
+#endif
|
||||
|
||||
rc = sqlite3_step(priv->stmt_arm_record);
|
||||
if (rc != SQLITE_OK && rc != SQLITE_DONE)
|
||||
--
|
||||
2.27.0
|
||||
|
||||
60
0008-modify-cpu-parse-for-adapting-to-new-bios-version.patch
Normal file
60
0008-modify-cpu-parse-for-adapting-to-new-bios-version.patch
Normal file
@ -0,0 +1,60 @@
|
||||
From 6b767a2fce615384f062ecb392cd332452bf4482 Mon Sep 17 00:00:00 2001
|
||||
From: Lostwayzxc <luoshengwei@huawei.com>
|
||||
Date: Wed, 1 Sep 2021 21:00:16 +0800
|
||||
Subject: [PATCH] modify cpu parse for adapting to new bios version
|
||||
|
||||
---
|
||||
ras-cpu-isolation.c | 20 ++++++++++++++++++--
|
||||
1 file changed, 18 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/ras-cpu-isolation.c b/ras-cpu-isolation.c
|
||||
index 6dcff70..b1643c4 100644
|
||||
--- a/ras-cpu-isolation.c
|
||||
+++ b/ras-cpu-isolation.c
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
static struct cpu_info *cpu_infos = NULL;
|
||||
static unsigned int ncores, cores_per_socket, cores_per_die;
|
||||
+static unsigned int cores_per_cluster = 4;
|
||||
static unsigned int sockets, dies = 1;
|
||||
static unsigned int enabled = 1;
|
||||
static const char *cpu_path_format = "/sys/devices/system/cpu/cpu%d/online";
|
||||
@@ -432,18 +433,33 @@ static unsigned long get_bit_value(int64_t value, unsigned offset, unsigned size
|
||||
|
||||
static unsigned get_cpu_index(int64_t mpidr)
|
||||
{
|
||||
- unsigned core_id, socket_id, die_id, cpu;
|
||||
+ unsigned core_id, cluster_id, socket_id, die_id, cpu;
|
||||
/*
|
||||
* Adapt to certain BIOS
|
||||
* In the MPIDR:
|
||||
* bit 8:15: core id
|
||||
+ * bit 16:18: cluster id
|
||||
* bit 19:20: die_id
|
||||
* bit 21:22: socket_id
|
||||
*/
|
||||
core_id = get_bit_value(mpidr, 8, 8);
|
||||
+ cluster_id = get_bit_value(mpidr, 16, 3);
|
||||
socket_id = get_bit_value(mpidr, 21, 2);
|
||||
die_id = get_bit_value(mpidr, 19, 2);
|
||||
- cpu = core_id + socket_id * cores_per_socket + die_id * cores_per_die;
|
||||
+
|
||||
+ /* When die id parsed from MPIDR is 1, it means TotemA, and when it's 3,
|
||||
+ * it means TotemB. When cores per die equal to cores per socket, it means
|
||||
+ * that there is only one die in the socket, in case that the only die is
|
||||
+ * TotemB in CPU 1620s, we set die id to 0 directly.
|
||||
+ */
|
||||
+ if (cores_per_die == cores_per_socket) {
|
||||
+ die_id = 0;
|
||||
+ }
|
||||
+ else {
|
||||
+ die_id = (die_id == 1 ? 0:1);
|
||||
+ }
|
||||
+ cpu = core_id + socket_id * cores_per_socket + die_id * cores_per_die +
|
||||
+ cluster_id * cores_per_cluster;
|
||||
|
||||
return cpu;
|
||||
}
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Name: rasdaemon
|
||||
Version: 0.6.6
|
||||
Release: 7
|
||||
Release: 8
|
||||
License: GPLv2
|
||||
Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events
|
||||
URL: https://github.com/mchehab/rasdaemon.git
|
||||
@ -34,6 +34,9 @@ Patch12: 0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch
|
||||
Patch13: 0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch
|
||||
Patch14: 0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch
|
||||
Patch15: 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch
|
||||
Patch16: 0006-add-cpu-online-fault-isolation.patch
|
||||
Patch17: 0007-add-trace-print-and-add-sqlite-store.patch
|
||||
Patch18: 0008-modify-cpu-parse-for-adapting-to-new-bios-version.patch
|
||||
|
||||
%description
|
||||
The rasdaemon program is a daemon which monitors the platform
|
||||
@ -80,6 +83,12 @@ rm INSTALL %{buildroot}/usr/include/*.h
|
||||
/usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || :
|
||||
|
||||
%changelog
|
||||
* Wed Oct 27 2021 luoshengwei<luoshengwei@huawei.com> - 0.6.6-8
|
||||
- Type:feature
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync three patches, add cpu online fault isolation.
|
||||
|
||||
* Wed Oct 20 2021 tanxiaofei<tanxiaofei@huawei.com> - 0.6.6-7
|
||||
- Type:Bugfix
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user