rasdaemon/0004-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch
Shiju Jose f9eb9d8c57 rasdaemon: Update with the latest patches for the CPU fault isolation, Hisilicon Kunpeng9xx common error records and improvements in the ras-mc-ctl for the Hisilicon Kunpeng9xx errors
Update with the latest patches for the
1. CPU online fault isolation for arm event.
2. Modify recording Hisilicon common error data in the rasdaemon
3. In the ras-mc-ctl,
3.1. Improve Hisilicon common error statistics.
3.2. Add support to display the HiSilicon vendor-errors for a specified module.
3.3. Add printing usage if necessary parameters are not passed for the HiSilicon vendor-errors options.
3.4. Reformat error info of the HiSilicon Kunpeng920.
3.5. Relocate reading and display Kunpeng920 errors to under Kunpeng9xx.
3.6. Updated the HiSilicon platform name as KunPeng9xx.
4. Fixed a memory out-of-bounds issue in the rasdaemon.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
2022-05-30 09:30:03 +01:00

98 lines
4.7 KiB
Diff

From 4f706ff3b1a04de3be506a309e153b99e04b3445 Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Thu, 24 Feb 2022 18:02:14 +0000
Subject: [PATCH 04/10] rasdaemon: ras-mc-ctl: Modify error statistics for
HiSilicon KunPeng9xx common errors
Modify the error statistics for the HiSilicon KunPeng9xx platforms common errors
to display the statistics and error info based on the module and the error severity.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
util/ras-mc-ctl.in | 40 +++++++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 11 deletions(-)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index b22dd60..08eb287 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -1537,7 +1537,7 @@ sub vendor_errors_summary
require DBI;
my ($num_args, $platform_id);
my ($query, $query_handle, $count, $out);
- my ($module_id, $sub_module_id, $err_severity, $err_sev, $err_info);
+ my ($module_id, $sub_module_id, $err_severity, $err_sev);
$num_args = $#ARGV + 1;
$platform_id = 0;
@@ -1614,13 +1614,18 @@ sub vendor_errors_summary
# HiSilicon Kunpeng9xx common errors
if ($platform_id eq HISILICON_KUNPENG_9XX) {
- $query = "select err_info, count(*) from hisi_common_section";
+ $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
- $query_handle->bind_columns(\($err_info, $count));
+ $query_handle->bind_columns(\($err_severity, $module_id, $count));
$out = "";
+ $err_sev = "";
while($query_handle->fetch()) {
- $out .= "\terrors: $count\n";
+ if ($err_severity ne $err_sev) {
+ $out .= "$err_severity errors:\n";
+ $err_sev = $err_severity;
+ }
+ $out .= "\t$module_id: $count\n";
}
if ($out ne "") {
print "HiSilicon Kunpeng9xx common error events summary:\n$out\n";
@@ -1638,8 +1643,8 @@ sub vendor_errors
require DBI;
my ($num_args, $platform_id);
my ($query, $query_handle, $id, $timestamp, $out);
- my ($version, $soc_id, $socket_id, $nimbus_id, $core_id, $port_id);
- my ($module_id, $sub_module_id, $err_severity, $err_type, $err_info, $regs);
+ my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id);
+ my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs);
$num_args = $#ARGV + 1;
$platform_id = 0;
@@ -1727,15 +1732,28 @@ sub vendor_errors
# HiSilicon Kunpeng9xx common errors
if ($platform_id eq HISILICON_KUNPENG_9XX) {
- $query = "select id, timestamp, err_info, regs_dump from hisi_common_section order by id";
+ $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
- $query_handle->bind_columns(\($id, $timestamp, $err_info, $regs));
+ $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp ";
- $out .= "Error Info:$err_info \n" if ($err_info);
- $out .= "Error Registers: $regs\n\n" if ($regs);
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "totem_id=$totem_id, " if ($totem_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
+ $out .= "module_id=$module_id, " if ($module_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "core_id=$core_id, " if ($core_id);
+ $out .= "port_id=$port_id, " if ($port_id);
+ $out .= "err_type=$err_type, " if ($err_type);
+ $out .= "pcie_info=$pcie_info, " if ($pcie_info);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs" if ($regs);
+ $out .= "\n\n";
}
if ($out ne "") {
print "HiSilicon Kunpeng9xx common error events:\n$out\n";
--
2.25.1