Update with the latest patches for the 1. CPU online fault isolation for arm event. 2. Modify recording Hisilicon common error data in the rasdaemon 3. In the ras-mc-ctl, 3.1. Improve Hisilicon common error statistics. 3.2. Add support to display the HiSilicon vendor-errors for a specified module. 3.3. Add printing usage if necessary parameters are not passed for the HiSilicon vendor-errors options. 3.4. Reformat error info of the HiSilicon Kunpeng920. 3.5. Relocate reading and display Kunpeng920 errors to under Kunpeng9xx. 3.6. Updated the HiSilicon platform name as KunPeng9xx. 4. Fixed a memory out-of-bounds issue in the rasdaemon. Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
275 lines
13 KiB
Diff
275 lines
13 KiB
Diff
From 0643011831e5fb4e81edff16ad55f9a5196ec7a9 Mon Sep 17 00:00:00 2001
|
|
From: Shiju Jose <shiju.jose@huawei.com>
|
|
Date: Sat, 5 Mar 2022 18:19:38 +0000
|
|
Subject: [PATCH 07/10] rasdaemon: ras-mc-ctl: Add support to display the
|
|
HiSilicon vendor errors for a specified module
|
|
|
|
Add support to display the HiSilicon vendor errors for a specified module.
|
|
|
|
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
|
|
---
|
|
util/ras-mc-ctl.in | 145 +++++++++++++++++++++++++++------------------
|
|
1 file changed, 87 insertions(+), 58 deletions(-)
|
|
|
|
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
|
|
index 959ea6b..296eb87 100755
|
|
--- a/util/ras-mc-ctl.in
|
|
+++ b/util/ras-mc-ctl.in
|
|
@@ -96,8 +96,9 @@ Usage: $prog [OPTIONS...]
|
|
--errors Shows the errors stored at the error database.
|
|
--error-count Shows the corrected and uncorrected error counts using sysfs.
|
|
--vendor-errors-summary <platform-id> Presents a summary of the vendor-specific logged errors.
|
|
- --vendor-errors <platform-id> Shows the vendor-specific errors stored in the error database.
|
|
- --vendor-platforms Shows the supported platforms with platform-ids for the vendor-specific errors.
|
|
+ --vendor-errors <platform-id> Shows the vendor-specific errors stored in the error database.
|
|
+ --vendor-errors <platform-id> <module-name> Shows the vendor-specific errors for a specific module stored in the error database.
|
|
+ --vendor-platforms List the supported platforms with platform-ids for the vendor-specific errors.
|
|
--help This help message.
|
|
EOF
|
|
|
|
@@ -1535,12 +1536,14 @@ use constant {
|
|
sub vendor_errors_summary
|
|
{
|
|
require DBI;
|
|
- my ($num_args, $platform_id);
|
|
+ my ($num_args, $platform_id, $found_platform);
|
|
my ($query, $query_handle, $count, $out);
|
|
my ($module_id, $sub_module_id, $err_severity, $err_sev);
|
|
|
|
$num_args = $#ARGV + 1;
|
|
$platform_id = 0;
|
|
+ $found_platform = 0;
|
|
+
|
|
if ($num_args ne 0) {
|
|
$platform_id = $ARGV[0];
|
|
} else {
|
|
@@ -1552,6 +1555,7 @@ sub vendor_errors_summary
|
|
|
|
# HiSilicon Kunpeng920 errors
|
|
if ($platform_id eq HISILICON_KUNPENG_920) {
|
|
+ $found_platform = 1;
|
|
$query = "select err_severity, module_id, count(*) from hip08_oem_type1_event_v2 group by err_severity, module_id";
|
|
$query_handle = $dbh->prepare($query);
|
|
$query_handle->execute();
|
|
@@ -1615,6 +1619,7 @@ sub vendor_errors_summary
|
|
|
|
# HiSilicon Kunpeng9xx common errors
|
|
if ($platform_id eq HISILICON_KUNPENG_9XX) {
|
|
+ $found_platform = 1;
|
|
$query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id";
|
|
$query_handle = $dbh->prepare($query);
|
|
$query_handle->execute();
|
|
@@ -1636,21 +1641,31 @@ sub vendor_errors_summary
|
|
$query_handle->finish;
|
|
}
|
|
|
|
+ if ($platform_id && !($found_platform)) {
|
|
+ print "Platform ID $platform_id is not valid\n";
|
|
+ }
|
|
+
|
|
undef($dbh);
|
|
}
|
|
|
|
sub vendor_errors
|
|
{
|
|
require DBI;
|
|
- my ($num_args, $platform_id);
|
|
+ my ($num_args, $platform_id, $found_platform, $module, $found_module);
|
|
my ($query, $query_handle, $id, $timestamp, $out);
|
|
my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id);
|
|
my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs);
|
|
|
|
$num_args = $#ARGV + 1;
|
|
$platform_id = 0;
|
|
+ $found_platform = 0;
|
|
+ $module = 0;
|
|
+ $found_module = 0;
|
|
if ($num_args ne 0) {
|
|
$platform_id = $ARGV[0];
|
|
+ if ($num_args gt 1) {
|
|
+ $module = $ARGV[1];
|
|
+ }
|
|
} else {
|
|
usage(1);
|
|
return;
|
|
@@ -1660,27 +1675,29 @@ sub vendor_errors
|
|
|
|
# HiSilicon Kunpeng920 errors
|
|
if ($platform_id eq HISILICON_KUNPENG_920) {
|
|
+ $found_platform = 1;
|
|
$query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2 order by id, module_id, err_severity";
|
|
$query_handle = $dbh->prepare($query);
|
|
$query_handle->execute();
|
|
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs));
|
|
$out = "";
|
|
while($query_handle->fetch()) {
|
|
- $out .= "$id. $timestamp Error Info: ";
|
|
- $out .= "version=$version, ";
|
|
- $out .= "soc_id=$soc_id, " if ($soc_id);
|
|
- $out .= "socket_id=$socket_id, " if ($socket_id);
|
|
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
|
- $out .= "module_id=$module_id, " if ($module_id);
|
|
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
|
- $out .= "err_severity=$err_severity, " if ($err_severity);
|
|
- $out .= "Error Registers: $regs " if ($regs);
|
|
- $out .= "\n\n";
|
|
+ if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) {
|
|
+ $out .= "$id. $timestamp Error Info: ";
|
|
+ $out .= "version=$version, ";
|
|
+ $out .= "soc_id=$soc_id, " if ($soc_id);
|
|
+ $out .= "socket_id=$socket_id, " if ($socket_id);
|
|
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
|
+ $out .= "module_id=$module_id, " if ($module_id);
|
|
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
|
+ $out .= "err_severity=$err_severity, " if ($err_severity);
|
|
+ $out .= "Error Registers: $regs " if ($regs);
|
|
+ $out .= "\n\n";
|
|
+ $found_module = 1;
|
|
+ }
|
|
}
|
|
if ($out ne "") {
|
|
print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n";
|
|
- } else {
|
|
- print "No HiSilicon Kunpeng920 OEM type1 errors.\n";
|
|
}
|
|
$query_handle->finish;
|
|
|
|
@@ -1690,21 +1707,22 @@ sub vendor_errors
|
|
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs));
|
|
$out = "";
|
|
while($query_handle->fetch()) {
|
|
- $out .= "$id. $timestamp Error Info: ";
|
|
- $out .= "version=$version, ";
|
|
- $out .= "soc_id=$soc_id, " if ($soc_id);
|
|
- $out .= "socket_id=$socket_id, " if ($socket_id);
|
|
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
|
- $out .= "module_id=$module_id, " if ($module_id);
|
|
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
|
- $out .= "err_severity=$err_severity, " if ($err_severity);
|
|
- $out .= "Error Registers: $regs " if ($regs);
|
|
- $out .= "\n\n";
|
|
+ if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) {
|
|
+ $out .= "$id. $timestamp Error Info: ";
|
|
+ $out .= "version=$version, ";
|
|
+ $out .= "soc_id=$soc_id, " if ($soc_id);
|
|
+ $out .= "socket_id=$socket_id, " if ($socket_id);
|
|
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
|
+ $out .= "module_id=$module_id, " if ($module_id);
|
|
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
|
+ $out .= "err_severity=$err_severity, " if ($err_severity);
|
|
+ $out .= "Error Registers: $regs " if ($regs);
|
|
+ $out .= "\n\n";
|
|
+ $found_module = 1;
|
|
+ }
|
|
}
|
|
if ($out ne "") {
|
|
print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n";
|
|
- } else {
|
|
- print "No HiSilicon Kunpeng920 OEM type2 errors.\n";
|
|
}
|
|
$query_handle->finish;
|
|
|
|
@@ -1714,51 +1732,56 @@ sub vendor_errors
|
|
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $sub_module_id, $core_id, $port_id, $err_severity, $err_type, $regs));
|
|
$out = "";
|
|
while($query_handle->fetch()) {
|
|
- $out .= "$id. $timestamp Error Info: ";
|
|
- $out .= "version=$version, ";
|
|
- $out .= "soc_id=$soc_id, " if ($soc_id);
|
|
- $out .= "socket_id=$socket_id, " if ($socket_id);
|
|
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
|
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
|
- $out .= "core_id=$core_id, " if ($core_id);
|
|
- $out .= "port_id=$port_id, " if ($port_id);
|
|
- $out .= "err_severity=$err_severity, " if ($err_severity);
|
|
- $out .= "err_type=$err_type, " if ($err_type);
|
|
- $out .= "Error Registers: $regs " if ($regs);
|
|
- $out .= "\n\n";
|
|
+ if ($module eq 0 || ($sub_module_id && uc($module) eq uc($sub_module_id))) {
|
|
+ $out .= "$id. $timestamp Error Info: ";
|
|
+ $out .= "version=$version, ";
|
|
+ $out .= "soc_id=$soc_id, " if ($soc_id);
|
|
+ $out .= "socket_id=$socket_id, " if ($socket_id);
|
|
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
|
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
|
+ $out .= "core_id=$core_id, " if ($core_id);
|
|
+ $out .= "port_id=$port_id, " if ($port_id);
|
|
+ $out .= "err_severity=$err_severity, " if ($err_severity);
|
|
+ $out .= "err_type=$err_type, " if ($err_type);
|
|
+ $out .= "Error Registers: $regs " if ($regs);
|
|
+ $out .= "\n\n";
|
|
+ $found_module = 1;
|
|
+ }
|
|
}
|
|
if ($out ne "") {
|
|
print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n";
|
|
- } else {
|
|
- print "No HiSilicon Kunpeng920 PCIe controller errors.\n";
|
|
}
|
|
$query_handle->finish;
|
|
}
|
|
|
|
# HiSilicon Kunpeng9xx common errors
|
|
if ($platform_id eq HISILICON_KUNPENG_9XX) {
|
|
+ $found_platform = 1;
|
|
$query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity";
|
|
$query_handle = $dbh->prepare($query);
|
|
$query_handle->execute();
|
|
$query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs));
|
|
$out = "";
|
|
while($query_handle->fetch()) {
|
|
- $out .= "$id. $timestamp Error Info: ";
|
|
- $out .= "version=$version, ";
|
|
- $out .= "soc_id=$soc_id, " if ($soc_id);
|
|
- $out .= "socket_id=$socket_id, " if ($socket_id);
|
|
- $out .= "totem_id=$totem_id, " if ($totem_id);
|
|
- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
|
- $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
|
|
- $out .= "module_id=$module_id, " if ($module_id);
|
|
- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
|
- $out .= "core_id=$core_id, " if ($core_id);
|
|
- $out .= "port_id=$port_id, " if ($port_id);
|
|
- $out .= "err_type=$err_type, " if ($err_type);
|
|
- $out .= "pcie_info=$pcie_info, " if ($pcie_info);
|
|
- $out .= "err_severity=$err_severity, " if ($err_severity);
|
|
- $out .= "Error Registers: $regs" if ($regs);
|
|
- $out .= "\n\n";
|
|
+ if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) {
|
|
+ $out .= "$id. $timestamp Error Info: ";
|
|
+ $out .= "version=$version, ";
|
|
+ $out .= "soc_id=$soc_id, " if ($soc_id);
|
|
+ $out .= "socket_id=$socket_id, " if ($socket_id);
|
|
+ $out .= "totem_id=$totem_id, " if ($totem_id);
|
|
+ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id);
|
|
+ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id);
|
|
+ $out .= "module_id=$module_id, " if ($module_id);
|
|
+ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id);
|
|
+ $out .= "core_id=$core_id, " if ($core_id);
|
|
+ $out .= "port_id=$port_id, " if ($port_id);
|
|
+ $out .= "err_type=$err_type, " if ($err_type);
|
|
+ $out .= "pcie_info=$pcie_info, " if ($pcie_info);
|
|
+ $out .= "err_severity=$err_severity, " if ($err_severity);
|
|
+ $out .= "Error Registers: $regs" if ($regs);
|
|
+ $out .= "\n\n";
|
|
+ $found_module = 1;
|
|
+ }
|
|
}
|
|
if ($out ne "") {
|
|
print "HiSilicon Kunpeng9xx common error events:\n$out\n";
|
|
@@ -1768,6 +1791,12 @@ sub vendor_errors
|
|
$query_handle->finish;
|
|
}
|
|
|
|
+ if ($platform_id && !($found_platform)) {
|
|
+ print "Platform ID $platform_id is not valid\n";
|
|
+ } elsif ($module && !($found_module)) {
|
|
+ print "No error record for the module $module\n";
|
|
+ }
|
|
+
|
|
undef($dbh);
|
|
}
|
|
|
|
--
|
|
2.25.1
|
|
|