rasdaemon/0007-rasdaemon-ras-mc-ctl-Add-support-to-display-the-HiSi.patch
Shiju Jose f9eb9d8c57 rasdaemon: Update with the latest patches for the CPU fault isolation, Hisilicon Kunpeng9xx common error records and improvements in the ras-mc-ctl for the Hisilicon Kunpeng9xx errors
Update with the latest patches for the
1. CPU online fault isolation for arm event.
2. Modify recording Hisilicon common error data in the rasdaemon
3. In the ras-mc-ctl,
3.1. Improve Hisilicon common error statistics.
3.2. Add support to display the HiSilicon vendor-errors for a specified module.
3.3. Add printing usage if necessary parameters are not passed for the HiSilicon vendor-errors options.
3.4. Reformat error info of the HiSilicon Kunpeng920.
3.5. Relocate reading and display Kunpeng920 errors to under Kunpeng9xx.
3.6. Updated the HiSilicon platform name as KunPeng9xx.
4. Fixed a memory out-of-bounds issue in the rasdaemon.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
2022-05-30 09:30:03 +01:00

275 lines
13 KiB
Diff

From 0643011831e5fb4e81edff16ad55f9a5196ec7a9 Mon Sep 17 00:00:00 2001
From: Shiju Jose <shiju.jose@huawei.com>
Date: Sat, 5 Mar 2022 18:19:38 +0000
Subject: [PATCH 07/10] rasdaemon: ras-mc-ctl: Add support to display the
HiSilicon vendor errors for a specified module
Add support to display the HiSilicon vendor errors for a specified module.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
util/ras-mc-ctl.in | 145 +++++++++++++++++++++++++++------------------
1 file changed, 87 insertions(+), 58 deletions(-)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index 959ea6b..296eb87 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -96,8 +96,9 @@ Usage: $prog [OPTIONS...]
--errors Shows the errors stored at the error database.
--error-count Shows the corrected and uncorrected error counts using sysfs.
--vendor-errors-summary <platform-id> Presents a summary of the vendor-specific logged errors.
- --vendor-errors <platform-id> Shows the vendor-specific errors stored in the error database.
- --vendor-platforms Shows the supported platforms with platform-ids for the vendor-specific errors.
+ --vendor-errors <platform-id> Shows the vendor-specific errors stored in the error database.
+ --vendor-errors <platform-id> <module-name> Shows the vendor-specific errors for a specific module stored in the error database.
+ --vendor-platforms List the supported platforms with platform-ids for the vendor-specific errors.
--help This help message.
EOF
@@ -1535,12 +1536,14 @@ use constant {
sub vendor_errors_summary
{
require DBI;
- my ($num_args, $platform_id);
+ my ($num_args, $platform_id, $found_platform);
my ($query, $query_handle, $count, $out);
my ($module_id, $sub_module_id, $err_severity, $err_sev);
$num_args = $#ARGV + 1;
$platform_id = 0;
+ $found_platform = 0;
+
if ($num_args ne 0) {
$platform_id = $ARGV[0];
} else {
@@ -1552,6 +1555,7 @@ sub vendor_errors_summary
# HiSilicon Kunpeng920 errors
if ($platform_id eq HISILICON_KUNPENG_920) {
+ $found_platform = 1;
$query = "select err_severity, module_id, count(*) from hip08_oem_type1_event_v2 group by err_severity, module_id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
@@ -1615,6 +1619,7 @@ sub vendor_errors_summary
# HiSilicon Kunpeng9xx common errors
if ($platform_id eq HISILICON_KUNPENG_9XX) {
+ $found_platform = 1;
$query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
@@ -1636,21 +1641,31 @@ sub vendor_errors_summary
$query_handle->finish;
}
+ if ($platform_id && !($found_platform)) {
+ print "Platform ID $platform_id is not valid\n";
+ }
+
undef($dbh);
}
sub vendor_errors
{
require DBI;
- my ($num_args, $platform_id);
+ my ($num_args, $platform_id, $found_platform, $module, $found_module);
my ($query, $query_handle, $id, $timestamp, $out);
my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id);
my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs);
$num_args = $#ARGV + 1;
$platform_id = 0;
+ $found_platform = 0;
+ $module = 0;
+ $found_module = 0;
if ($num_args ne 0) {
$platform_id = $ARGV[0];
+ if ($num_args gt 1) {
+ $module = $ARGV[1];
+ }
} else {
usage(1);
return;
@@ -1660,27 +1675,29 @@ sub vendor_errors
# HiSilicon Kunpeng920 errors
if ($platform_id eq HISILICON_KUNPENG_920) {
+ $found_platform = 1;
$query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2 order by id, module_id, err_severity";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp Error Info: ";
- $out .= "version=$version, ";
- $out .= "soc_id=$soc_id, " if ($soc_id);
- $out .= "socket_id=$socket_id, " if ($socket_id);
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
- $out .= "module_id=$module_id, " if ($module_id);
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "err_severity=$err_severity, " if ($err_severity);
- $out .= "Error Registers: $regs " if ($regs);
- $out .= "\n\n";
+ if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) {
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "module_id=$module_id, " if ($module_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs " if ($regs);
+ $out .= "\n\n";
+ $found_module = 1;
+ }
}
if ($out ne "") {
print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng920 OEM type1 errors.\n";
}
$query_handle->finish;
@@ -1690,21 +1707,22 @@ sub vendor_errors
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp Error Info: ";
- $out .= "version=$version, ";
- $out .= "soc_id=$soc_id, " if ($soc_id);
- $out .= "socket_id=$socket_id, " if ($socket_id);
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
- $out .= "module_id=$module_id, " if ($module_id);
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "err_severity=$err_severity, " if ($err_severity);
- $out .= "Error Registers: $regs " if ($regs);
- $out .= "\n\n";
+ if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) {
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "module_id=$module_id, " if ($module_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs " if ($regs);
+ $out .= "\n\n";
+ $found_module = 1;
+ }
}
if ($out ne "") {
print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng920 OEM type2 errors.\n";
}
$query_handle->finish;
@@ -1714,51 +1732,56 @@ sub vendor_errors
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $sub_module_id, $core_id, $port_id, $err_severity, $err_type, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp Error Info: ";
- $out .= "version=$version, ";
- $out .= "soc_id=$soc_id, " if ($soc_id);
- $out .= "socket_id=$socket_id, " if ($socket_id);
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "core_id=$core_id, " if ($core_id);
- $out .= "port_id=$port_id, " if ($port_id);
- $out .= "err_severity=$err_severity, " if ($err_severity);
- $out .= "err_type=$err_type, " if ($err_type);
- $out .= "Error Registers: $regs " if ($regs);
- $out .= "\n\n";
+ if ($module eq 0 || ($sub_module_id && uc($module) eq uc($sub_module_id))) {
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "core_id=$core_id, " if ($core_id);
+ $out .= "port_id=$port_id, " if ($port_id);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "err_type=$err_type, " if ($err_type);
+ $out .= "Error Registers: $regs " if ($regs);
+ $out .= "\n\n";
+ $found_module = 1;
+ }
}
if ($out ne "") {
print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n";
- } else {
- print "No HiSilicon Kunpeng920 PCIe controller errors.\n";
}
$query_handle->finish;
}
# HiSilicon Kunpeng9xx common errors
if ($platform_id eq HISILICON_KUNPENG_9XX) {
+ $found_platform = 1;
$query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id. $timestamp Error Info: ";
- $out .= "version=$version, ";
- $out .= "soc_id=$soc_id, " if ($soc_id);
- $out .= "socket_id=$socket_id, " if ($socket_id);
- $out .= "totem_id=$totem_id, " if ($totem_id);
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
- $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
- $out .= "module_id=$module_id, " if ($module_id);
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
- $out .= "core_id=$core_id, " if ($core_id);
- $out .= "port_id=$port_id, " if ($port_id);
- $out .= "err_type=$err_type, " if ($err_type);
- $out .= "pcie_info=$pcie_info, " if ($pcie_info);
- $out .= "err_severity=$err_severity, " if ($err_severity);
- $out .= "Error Registers: $regs" if ($regs);
- $out .= "\n\n";
+ if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) {
+ $out .= "$id. $timestamp Error Info: ";
+ $out .= "version=$version, ";
+ $out .= "soc_id=$soc_id, " if ($soc_id);
+ $out .= "socket_id=$socket_id, " if ($socket_id);
+ $out .= "totem_id=$totem_id, " if ($totem_id);
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
+ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
+ $out .= "module_id=$module_id, " if ($module_id);
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
+ $out .= "core_id=$core_id, " if ($core_id);
+ $out .= "port_id=$port_id, " if ($port_id);
+ $out .= "err_type=$err_type, " if ($err_type);
+ $out .= "pcie_info=$pcie_info, " if ($pcie_info);
+ $out .= "err_severity=$err_severity, " if ($err_severity);
+ $out .= "Error Registers: $regs" if ($regs);
+ $out .= "\n\n";
+ $found_module = 1;
+ }
}
if ($out ne "") {
print "HiSilicon Kunpeng9xx common error events:\n$out\n";
@@ -1768,6 +1791,12 @@ sub vendor_errors
$query_handle->finish;
}
+ if ($platform_id && !($found_platform)) {
+ print "Platform ID $platform_id is not valid\n";
+ } elsif ($module && !($found_module)) {
+ print "No error record for the module $module\n";
+ }
+
undef($dbh);
}
--
2.25.1