Update with the latest patches for the 1. CPU online fault isolation for arm event. 2. Modify recording Hisilicon common error data in the rasdaemon 3. In the ras-mc-ctl, 3.1. Improve Hisilicon common error statistics. 3.2. Add support to display the HiSilicon vendor-errors for a specified module. 3.3. Add printing usage if necessary parameters are not passed for the HiSilicon vendor-errors options. 3.4. Reformat error info of the HiSilicon Kunpeng920. 3.5. Relocate reading and display Kunpeng920 errors to under Kunpeng9xx. 3.6. Updated the HiSilicon platform name as KunPeng9xx. 4. Fixed a memory out-of-bounds issue in the rasdaemon. Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
98 lines
4.7 KiB
Diff
98 lines
4.7 KiB
Diff
From 4f706ff3b1a04de3be506a309e153b99e04b3445 Mon Sep 17 00:00:00 2001
|
|
From: Shiju Jose <shiju.jose@huawei.com>
|
|
Date: Thu, 24 Feb 2022 18:02:14 +0000
|
|
Subject: [PATCH 04/10] rasdaemon: ras-mc-ctl: Modify error statistics for
|
|
HiSilicon KunPeng9xx common errors
|
|
|
|
Modify the error statistics for the HiSilicon KunPeng9xx platforms common errors
|
|
to display the statistics and error info based on the module and the error severity.
|
|
|
|
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
|
|
---
|
|
util/ras-mc-ctl.in | 40 +++++++++++++++++++++++++++++-----------
|
|
1 file changed, 29 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
|
|
index b22dd60..08eb287 100755
|
|
--- a/util/ras-mc-ctl.in
|
|
+++ b/util/ras-mc-ctl.in
|
|
@@ -1537,7 +1537,7 @@ sub vendor_errors_summary
|
|
require DBI;
|
|
my ($num_args, $platform_id);
|
|
my ($query, $query_handle, $count, $out);
|
|
- my ($module_id, $sub_module_id, $err_severity, $err_sev, $err_info);
|
|
+ my ($module_id, $sub_module_id, $err_severity, $err_sev);
|
|
|
|
$num_args = $#ARGV + 1;
|
|
$platform_id = 0;
|
|
@@ -1614,13 +1614,18 @@ sub vendor_errors_summary
|
|
|
|
# HiSilicon Kunpeng9xx common errors
|
|
if ($platform_id eq HISILICON_KUNPENG_9XX) {
|
|
- $query = "select err_info, count(*) from hisi_common_section";
|
|
+ $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id";
|
|
$query_handle = $dbh->prepare($query);
|
|
$query_handle->execute();
|
|
- $query_handle->bind_columns(\($err_info, $count));
|
|
+ $query_handle->bind_columns(\($err_severity, $module_id, $count));
|
|
$out = "";
|
|
+ $err_sev = "";
|
|
while($query_handle->fetch()) {
|
|
- $out .= "\terrors: $count\n";
|
|
+ if ($err_severity ne $err_sev) {
|
|
+ $out .= "$err_severity errors:\n";
|
|
+ $err_sev = $err_severity;
|
|
+ }
|
|
+ $out .= "\t$module_id: $count\n";
|
|
}
|
|
if ($out ne "") {
|
|
print "HiSilicon Kunpeng9xx common error events summary:\n$out\n";
|
|
@@ -1638,8 +1643,8 @@ sub vendor_errors
|
|
require DBI;
|
|
my ($num_args, $platform_id);
|
|
my ($query, $query_handle, $id, $timestamp, $out);
|
|
- my ($version, $soc_id, $socket_id, $nimbus_id, $core_id, $port_id);
|
|
- my ($module_id, $sub_module_id, $err_severity, $err_type, $err_info, $regs);
|
|
+ my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id);
|
|
+ my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs);
|
|
|
|
$num_args = $#ARGV + 1;
|
|
$platform_id = 0;
|
|
@@ -1727,15 +1732,28 @@ sub vendor_errors
|
|
|
|
# HiSilicon Kunpeng9xx common errors
|
|
if ($platform_id eq HISILICON_KUNPENG_9XX) {
|
|
- $query = "select id, timestamp, err_info, regs_dump from hisi_common_section order by id";
|
|
+ $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity";
|
|
$query_handle = $dbh->prepare($query);
|
|
$query_handle->execute();
|
|
- $query_handle->bind_columns(\($id, $timestamp, $err_info, $regs));
|
|
+ $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs));
|
|
$out = "";
|
|
while($query_handle->fetch()) {
|
|
- $out .= "$id. $timestamp ";
|
|
- $out .= "Error Info:$err_info \n" if ($err_info);
|
|
- $out .= "Error Registers: $regs\n\n" if ($regs);
|
|
+ $out .= "$id. $timestamp Error Info: ";
|
|
+ $out .= "version=$version, ";
|
|
+ $out .= "soc_id=$soc_id, " if ($soc_id);
|
|
+ $out .= "socket_id=$socket_id, " if ($socket_id);
|
|
+ $out .= "totem_id=$totem_id, " if ($totem_id);
|
|
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
|
+ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
|
|
+ $out .= "module_id=$module_id, " if ($module_id);
|
|
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
|
+ $out .= "core_id=$core_id, " if ($core_id);
|
|
+ $out .= "port_id=$port_id, " if ($port_id);
|
|
+ $out .= "err_type=$err_type, " if ($err_type);
|
|
+ $out .= "pcie_info=$pcie_info, " if ($pcie_info);
|
|
+ $out .= "err_severity=$err_severity, " if ($err_severity);
|
|
+ $out .= "Error Registers: $regs" if ($regs);
|
|
+ $out .= "\n\n";
|
|
}
|
|
if ($out ne "") {
|
|
print "HiSilicon Kunpeng9xx common error events:\n$out\n";
|
|
--
|
|
2.25.1
|
|
|