gala-gopher/bugfix-add-check-if-thread-is-0.patch
Zhen Chen 4a36a5936f sync bugfix patches from openeuler/gala-gopher
- fix ksliprobe get invalid args occasionally at startup
- fix error print when starting gala-gopher
- add system_uuid field to distinguish client when post to pyroscope server
- repair stackprobe caused cpu rush
- add support to pyroscope
- bugfix: add check if thread is 0
- fix stackprobe memory allocation and deallocation errors
- normalize time format in flamegraph svg filename

(cherry picked from commit 6aef5cc8e4e2a34324c3f01663d2b61c0462f4ac)
2023-01-17 22:29:44 +08:00

210 lines
11 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From cffdb869f03aa454da35d06ad7baf4f2f330b9a0 Mon Sep 17 00:00:00 2001
From: zhaoyuxing <zhaoyuxing2@huawei.com>
Date: Tue, 27 Dec 2022 11:06:14 +0800
Subject: [PATCH] bugfix: add check if thread is 0
---
.../ebpf.probe/src/taskprobe/proc_probe.c | 2 +-
src/probes/system_infos.probe/system_cpu.c | 2 +-
src/probes/system_infos.probe/system_disk.c | 8 +--
.../system_infos.probe/system_meminfo.c | 4 +-
src/probes/system_infos.probe/system_net.c | 53 +++++++++++--------
5 files changed, 38 insertions(+), 31 deletions(-)
diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c b/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c
index 0374c32..40ddff1 100644
--- a/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c
+++ b/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c
@@ -153,7 +153,7 @@ static void report_proc_metrics(struct proc_data_s *proc)
proc->dns_op.gethostname_failed);
}
- if (proc->proc_io.iowait_us > latency_thr_us) {
+ if (latency_thr_us > 0 && proc->proc_io.iowait_us > latency_thr_us) {
report_logs(OO_NAME,
entityId,
"iowait_us",
diff --git a/src/probes/system_infos.probe/system_cpu.c b/src/probes/system_infos.probe/system_cpu.c
index f433ac4..0582d43 100644
--- a/src/probes/system_infos.probe/system_cpu.c
+++ b/src/probes/system_infos.probe/system_cpu.c
@@ -85,7 +85,7 @@ static void report_cpu_status(struct probe_params *params)
entityId[0] = 0;
(void)strcpy(entityId, "cpu");
- if (util_per > params->res_percent_upper) {
+ if (params->res_percent_upper > 0 && util_per > params->res_percent_upper) {
report_logs(ENTITY_NAME,
entityId,
"total_used_per",
diff --git a/src/probes/system_infos.probe/system_disk.c b/src/probes/system_infos.probe/system_disk.c
index b249dbf..84065f9 100644
--- a/src/probes/system_infos.probe/system_disk.c
+++ b/src/probes/system_infos.probe/system_disk.c
@@ -54,7 +54,7 @@ static void report_disk_status(df_stats inode_stats, df_stats blk_stats, struct
entityid[0] = 0;
- if (inode_stats.inode_or_blk_used_per > params->res_percent_upper) {
+ if (params->res_percent_upper > 0 && inode_stats.inode_or_blk_used_per > params->res_percent_upper) {
(void)strncpy(entityid, inode_stats.mount_on, LINE_BUF_LEN - 1);
report_logs(ENTITY_FS_NAME,
entityid,
@@ -63,7 +63,7 @@ static void report_disk_status(df_stats inode_stats, df_stats blk_stats, struct
"Too many Inodes consumed(%d%%).",
inode_stats.inode_or_blk_used_per);
}
- if (blk_stats.inode_or_blk_used_per > params->res_percent_upper) {
+ if (params->res_percent_upper > 0 && blk_stats.inode_or_blk_used_per > params->res_percent_upper) {
if (entityid[0] == 0) {
(void)strncpy(entityid, blk_stats.mount_on, LINE_BUF_LEN - 1);
}
@@ -212,7 +212,7 @@ static void report_disk_iostat(const char *disk_name, disk_io_stats *io_info, st
entityid[0] = 0;
- if (io_info->util > params->res_percent_upper) {
+ if (params->res_percent_upper > 0 && io_info->util > params->res_percent_upper) {
(void)strncpy(entityid, disk_name, LINE_BUF_LEN - 1);
report_logs(ENTITY_DISK_NAME,
entityid,
@@ -335,4 +335,4 @@ void system_iostat_destroy(void)
g_disk_stats = NULL;
}
return;
-}
\ No newline at end of file
+}
diff --git a/src/probes/system_infos.probe/system_meminfo.c b/src/probes/system_infos.probe/system_meminfo.c
index 5b6ba2d..ceb3815 100644
--- a/src/probes/system_infos.probe/system_meminfo.c
+++ b/src/probes/system_infos.probe/system_meminfo.c
@@ -85,7 +85,7 @@ static void report_meminfo_status(struct probe_params *params, double mem_util,
(void)strcpy(entityId, "/proc/meminfo");
(void)strcpy(entityName, "mem");
// mem util
- if (mem_util > params->res_percent_upper) {
+ if (params->res_percent_upper > 0 && mem_util > params->res_percent_upper) {
report_logs(entityName,
entityId,
"util",
@@ -94,7 +94,7 @@ static void report_meminfo_status(struct probe_params *params, double mem_util,
mem_util);
}
// swap util
- if (swap_util > params->res_percent_upper) {
+ if (params->res_percent_upper > 0 && swap_util > params->res_percent_upper) {
report_logs(entityName,
entityId,
"swap_util",
diff --git a/src/probes/system_infos.probe/system_net.c b/src/probes/system_infos.probe/system_net.c
index a096464..525aa54 100644
--- a/src/probes/system_infos.probe/system_net.c
+++ b/src/probes/system_infos.probe/system_net.c
@@ -1,4 +1,4 @@
-/******************************************************************************
+/******************************************************************************
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
* gala-gopher licensed under the Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
@@ -101,16 +101,19 @@ int system_tcp_probe(void)
METRICS_TCP_NAME,
"/proc/dev/snmp",
g_snmp_stats.tcp_curr_estab,
- g_snmp_stats.tcp_in_segs - temp.tcp_in_segs,
- g_snmp_stats.tcp_out_segs - temp.tcp_out_segs,
- g_snmp_stats.tcp_retrans_segs - temp.tcp_retrans_segs,
- g_snmp_stats.tcp_in_errs - temp.tcp_in_errs);
+ (g_snmp_stats.tcp_in_segs > temp.tcp_in_segs) ? (g_snmp_stats.tcp_in_segs - temp.tcp_in_segs) : 0,
+ (g_snmp_stats.tcp_out_segs > temp.tcp_out_segs) ? (g_snmp_stats.tcp_out_segs - temp.tcp_out_segs) : 0,
+ (g_snmp_stats.tcp_retrans_segs > temp.tcp_retrans_segs) ?
+ (g_snmp_stats.tcp_retrans_segs - temp.tcp_retrans_segs) : 0,
+ (g_snmp_stats.tcp_in_errs > temp.tcp_in_errs) ? (g_snmp_stats.tcp_in_errs - temp.tcp_in_errs) : 0);
(void)nprobe_fprintf(stdout, "|%s|%s|%llu|%llu|\n",
METRICS_UDP_NAME,
"/proc/dev/snmp",
- g_snmp_stats.udp_in_datagrams - temp.udp_in_datagrams,
- g_snmp_stats.udp_out_datagrams - temp.udp_out_datagrams);
+ (g_snmp_stats.udp_in_datagrams > temp.udp_in_datagrams) ?
+ (g_snmp_stats.udp_in_datagrams - temp.udp_in_datagrams) : 0,
+ (g_snmp_stats.udp_out_datagrams > temp.udp_out_datagrams) ?
+ (g_snmp_stats.udp_out_datagrams - temp.udp_out_datagrams) : 0);
(void)fclose(f);
return 0;
@@ -278,7 +281,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct
tx_errs = new_info->tx_errs - old_info->tx_errs;
rx_errs = new_info->rx_errs - old_info->rx_errs;
- if (tx_drops > params->drops_count_thr) {
+ if (params->drops_count_thr > 0 && tx_drops > params->drops_count_thr) {
(void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1);
report_logs(ENTITY_NIC_NAME,
entityid,
@@ -287,7 +290,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct
"net device tx queue drops(%llu).",
tx_drops);
}
- if (rx_drops > params->drops_count_thr) {
+ if (params->drops_count_thr > 0 && rx_drops > params->drops_count_thr) {
if (entityid[0] == 0) {
(void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1);
}
@@ -298,7 +301,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct
"net device rx queue drops(%llu).",
rx_drops);
}
- if (tx_errs > params->drops_count_thr) {
+ if (params->drops_count_thr > 0 && tx_errs > params->drops_count_thr) {
if (entityid[0] == 0) {
(void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1);
}
@@ -309,7 +312,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct
"net device tx queue errors(%llu).",
tx_errs);
}
- if (rx_errs > params->drops_count_thr) {
+ if (params->drops_count_thr > 0 && rx_errs > params->drops_count_thr) {
if (entityid[0] == 0) {
(void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1);
}
@@ -375,18 +378,22 @@ int system_net_probe(struct probe_params *params)
METRICS_NIC_NAME,
g_dev_stats[index].dev_name,
g_dev_stats[index].net_status == 1 ? "UP" : "DOWN",
- g_dev_stats[index].rx_bytes - temp.rx_bytes,
- g_dev_stats[index].rx_packets - temp.rx_packets,
- g_dev_stats[index].rx_errs - temp.rx_errs,
- g_dev_stats[index].rx_dropped - temp.rx_dropped,
- g_dev_stats[index].tx_bytes - temp.tx_bytes,
- g_dev_stats[index].tx_packets - temp.tx_packets,
- g_dev_stats[index].tx_errs - temp.tx_errs,
- g_dev_stats[index].tx_dropped - temp.tx_dropped,
- SPEED_VALUE(temp.rx_bytes, g_dev_stats[index].rx_bytes, params->period),
- SPEED_VALUE(temp.tx_bytes, g_dev_stats[index].tx_bytes, params->period),
- g_dev_stats[index].tc_sent_drop_count - temp.tc_sent_drop_count,
- g_dev_stats[index].tc_sent_overlimits_count - temp.tc_sent_overlimits_count,
+ (g_dev_stats[index].rx_bytes > temp.rx_bytes) ? (g_dev_stats[index].rx_bytes - temp.rx_bytes) : 0,
+ (g_dev_stats[index].rx_packets > temp.rx_packets) ? (g_dev_stats[index].rx_packets - temp.rx_packets) : 0,
+ (g_dev_stats[index].rx_errs > temp.rx_errs) ? (g_dev_stats[index].rx_errs - temp.rx_errs) : 0,
+ (g_dev_stats[index].rx_dropped > temp.rx_dropped) ? (g_dev_stats[index].rx_dropped - temp.rx_dropped) : 0,
+ (g_dev_stats[index].tx_bytes > temp.tx_bytes) ? (g_dev_stats[index].tx_bytes - temp.tx_bytes) : 0,
+ (g_dev_stats[index].tx_packets > temp.tx_packets) ? (g_dev_stats[index].tx_packets - temp.tx_packets) : 0,
+ (g_dev_stats[index].tx_errs > temp.tx_errs) ? (g_dev_stats[index].tx_errs - temp.tx_errs) : 0,
+ (g_dev_stats[index].tx_dropped > temp.tx_dropped) ? (g_dev_stats[index].tx_dropped - temp.tx_dropped) : 0,
+ (g_dev_stats[index].rx_bytes > temp.rx_bytes) ?
+ SPEED_VALUE(temp.rx_bytes, g_dev_stats[index].rx_bytes, params->period) : 0,
+ (g_dev_stats[index].tx_bytes > temp.tx_bytes) ?
+ SPEED_VALUE(temp.tx_bytes, g_dev_stats[index].tx_bytes, params->period) : 0,
+ (g_dev_stats[index].tc_sent_drop_count > temp.tc_sent_drop_count) ?
+ (g_dev_stats[index].tc_sent_drop_count - temp.tc_sent_drop_count) : 0,
+ (g_dev_stats[index].tc_sent_overlimits_count > temp.tc_sent_overlimits_count) ?
+ (g_dev_stats[index].tc_sent_overlimits_count - temp.tc_sent_overlimits_count) : 0,
g_dev_stats[index].tc_backlog_count,
g_dev_stats[index].tc_ecn_mark);
/* output event */
--
2.33.0