rasdaemon/0003-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch
Shiju Jose f9eb9d8c57 rasdaemon: Update with the latest patches for the CPU fault isolation, Hisilicon Kunpeng9xx common error records and improvements in the ras-mc-ctl for the Hisilicon Kunpeng9xx errors
Update with the latest patches for the
1. CPU online fault isolation for arm event.
2. Modify recording Hisilicon common error data in the rasdaemon
3. In the ras-mc-ctl,
3.1. Improve Hisilicon common error statistics.
3.2. Add support to display the HiSilicon vendor-errors for a specified module.
3.3. Add printing usage if necessary parameters are not passed for the HiSilicon vendor-errors options.
3.4. Reformat error info of the HiSilicon Kunpeng920.
3.5. Relocate reading and display Kunpeng920 errors to under Kunpeng9xx.
3.6. Updated the HiSilicon platform name as KunPeng9xx.
4. Fixed a memory out-of-bounds issue in the rasdaemon.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
2022-05-30 09:30:03 +01:00

229 lines
8.5 KiB
Diff

From 9c4665f33c39ea84db7d69079ab27205d2fbd07e Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Wed, 2 Mar 2022 12:20:40 +0000
Subject: [PATCH 03/10] rasdaemon: Modify recording Hisilicon common error data
The error statistics for the Hisilicon common
error need to do based on module, error severity etc.
Modify recording Hisilicon common error data as separate fields
in the sql db table instead of the combined single field.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
non-standard-hisilicon.c | 126 ++++++++++++++++++++++++++++++++-------
1 file changed, 104 insertions(+), 22 deletions(-)
diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c
index 1432163..d1e1774 100644
--- a/non-standard-hisilicon.c
+++ b/non-standard-hisilicon.c
@@ -17,6 +17,7 @@
#include "non-standard-hisilicon.h"
#define HISI_BUF_LEN 2048
+#define HISI_PCIE_INFO_BUF_LEN 256
struct hisi_common_error_section {
uint32_t val_bits;
@@ -63,12 +64,25 @@ enum {
enum {
HISI_COMMON_FIELD_ID,
HISI_COMMON_FIELD_TIMESTAMP,
- HISI_COMMON_FIELD_ERR_INFO,
+ HISI_COMMON_FIELD_VERSION,
+ HISI_COMMON_FIELD_SOC_ID,
+ HISI_COMMON_FIELD_SOCKET_ID,
+ HISI_COMMON_FIELD_TOTEM_ID,
+ HISI_COMMON_FIELD_NIMBUS_ID,
+ HISI_COMMON_FIELD_SUB_SYSTEM_ID,
+ HISI_COMMON_FIELD_MODULE_ID,
+ HISI_COMMON_FIELD_SUB_MODULE_ID,
+ HISI_COMMON_FIELD_CORE_ID,
+ HISI_COMMON_FIELD_PORT_ID,
+ HISI_COMMON_FIELD_ERR_TYPE,
+ HISI_COMMON_FIELD_PCIE_INFO,
+ HISI_COMMON_FIELD_ERR_SEVERITY,
HISI_COMMON_FIELD_REGS_DUMP,
};
struct hisi_event {
char error_msg[HISI_BUF_LEN];
+ char pcie_info[HISI_PCIE_INFO_BUF_LEN];
char reg_msg[HISI_BUF_LEN];
};
@@ -132,14 +146,26 @@ int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name)
#ifdef HAVE_SQLITE3
static const struct db_fields hisi_common_section_fields[] = {
- { .name = "id", .type = "INTEGER PRIMARY KEY" },
- { .name = "timestamp", .type = "TEXT" },
- { .name = "err_info", .type = "TEXT" },
+ { .name = "id", .type = "INTEGER PRIMARY KEY" },
+ { .name = "timestamp", .type = "TEXT" },
+ { .name = "version", .type = "INTEGER" },
+ { .name = "soc_id", .type = "INTEGER" },
+ { .name = "socket_id", .type = "INTEGER" },
+ { .name = "totem_id", .type = "INTEGER" },
+ { .name = "nimbus_id", .type = "INTEGER" },
+ { .name = "sub_system_id", .type = "INTEGER" },
+ { .name = "module_id", .type = "TEXT" },
+ { .name = "sub_module_id", .type = "INTEGER" },
+ { .name = "core_id", .type = "INTEGER" },
+ { .name = "port_id", .type = "INTEGER" },
+ { .name = "err_type", .type = "INTEGER" },
+ { .name = "pcie_info", .type = "TEXT" },
+ { .name = "err_severity", .type = "TEXT" },
{ .name = "regs_dump", .type = "TEXT" },
};
static const struct db_table_descriptor hisi_common_section_tab = {
- .name = "hisi_common_section",
+ .name = "hisi_common_section_v2",
.fields = hisi_common_section_fields,
.num_fields = ARRAY_SIZE(hisi_common_section_fields),
};
@@ -199,12 +225,20 @@ static const char* get_soc_desc(uint8_t soc_id)
return soc_desc[soc_id];
}
-static void decode_module(struct hisi_event *event, uint8_t module_id)
+static void decode_module(struct ras_ns_ev_decoder *ev_decoder,
+ struct hisi_event *event, uint8_t module_id)
{
- if (module_id >= sizeof(module_name)/sizeof(char *))
+ if (module_id >= sizeof(module_name)/sizeof(char *)) {
HISI_SNPRINTF(event->error_msg, "module=unknown(id=%hhu) ", module_id);
- else
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
+ HISI_COMMON_FIELD_MODULE_ID,
+ 0, "unknown");
+ } else {
HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
+ HISI_COMMON_FIELD_MODULE_ID,
+ 0, module_name[module_id]);
+ }
}
static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder,
@@ -212,43 +246,93 @@ static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder,
struct hisi_event *event)
{
HISI_SNPRINTF(event->error_msg, "[ table_version=%hhu", err->version);
- if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID))
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_VERSION,
+ err->version, NULL);
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) {
HISI_SNPRINTF(event->error_msg, "soc=%s", get_soc_desc(err->soc_id));
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_SOC_ID,
+ err->soc_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) {
HISI_SNPRINTF(event->error_msg, "socket_id=%hhu", err->socket_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_SOCKET_ID,
+ err->socket_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) {
HISI_SNPRINTF(event->error_msg, "totem_id=%hhu", err->totem_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_TOTEM_ID,
+ err->totem_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) {
HISI_SNPRINTF(event->error_msg, "nimbus_id=%hhu", err->nimbus_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_NIMBUS_ID,
+ err->nimbus_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) {
HISI_SNPRINTF(event->error_msg, "subsystem_id=%hhu", err->subsystem_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_SUB_SYSTEM_ID,
+ err->subsystem_id, NULL);
+ }
if (err->val_bits & BIT(HISI_COMMON_VALID_MODULE_ID))
- decode_module(event, err->module_id);
+ decode_module(ev_decoder, event, err->module_id);
- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) {
HISI_SNPRINTF(event->error_msg, "submodule_id=%hhu", err->submodule_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_SUB_MODULE_ID,
+ err->submodule_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) {
HISI_SNPRINTF(event->error_msg, "core_id=%hhu", err->core_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_CORE_ID,
+ err->core_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) {
HISI_SNPRINTF(event->error_msg, "port_id=%hhu", err->port_id);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_PORT_ID,
+ err->port_id, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) {
HISI_SNPRINTF(event->error_msg, "err_type=%hu", err->err_type);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT,
+ HISI_COMMON_FIELD_ERR_TYPE,
+ err->err_type, NULL);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) {
HISI_SNPRINTF(event->error_msg, "pcie_device_id=%04x:%02x:%02x.%x",
err->pcie_info.segment, err->pcie_info.bus,
err->pcie_info.device, err->pcie_info.function);
+ HISI_SNPRINTF(event->pcie_info, "%04x:%02x:%02x.%x",
+ err->pcie_info.segment, err->pcie_info.bus,
+ err->pcie_info.device, err->pcie_info.function);
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
+ HISI_COMMON_FIELD_PCIE_INFO,
+ 0, event->pcie_info);
+ }
- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY))
+ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) {
HISI_SNPRINTF(event->error_msg, "err_severity=%s", err_severity(err->err_severity));
+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
+ HISI_COMMON_FIELD_ERR_SEVERITY,
+ 0, err_severity(err->err_severity));
+ }
HISI_SNPRINTF(event->error_msg, "]");
}
@@ -293,8 +377,6 @@ static int decode_hisi_common_section(struct ras_events *ras,
record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
HISI_COMMON_FIELD_TIMESTAMP,
0, event->timestamp);
- record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
- HISI_COMMON_FIELD_ERR_INFO, 0, hevent.error_msg);
record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT,
HISI_COMMON_FIELD_REGS_DUMP, 0, hevent.reg_msg);
step_vendor_data_tab(ev_decoder, "hisi_common_section_tab");
--
2.25.1