- fix ksliprobe get invalid args occasionally at startup - fix error print when starting gala-gopher - add system_uuid field to distinguish client when post to pyroscope server - repair stackprobe caused cpu rush - add support to pyroscope - bugfix: add check if thread is 0 - fix stackprobe memory allocation and deallocation errors - normalize time format in flamegraph svg filename (cherry picked from commit 6aef5cc8e4e2a34324c3f01663d2b61c0462f4ac)
210 lines
11 KiB
Diff
210 lines
11 KiB
Diff
From cffdb869f03aa454da35d06ad7baf4f2f330b9a0 Mon Sep 17 00:00:00 2001
|
||
From: zhaoyuxing <zhaoyuxing2@huawei.com>
|
||
Date: Tue, 27 Dec 2022 11:06:14 +0800
|
||
Subject: [PATCH] bugfix: add check if thread is 0
|
||
|
||
---
|
||
.../ebpf.probe/src/taskprobe/proc_probe.c | 2 +-
|
||
src/probes/system_infos.probe/system_cpu.c | 2 +-
|
||
src/probes/system_infos.probe/system_disk.c | 8 +--
|
||
.../system_infos.probe/system_meminfo.c | 4 +-
|
||
src/probes/system_infos.probe/system_net.c | 53 +++++++++++--------
|
||
5 files changed, 38 insertions(+), 31 deletions(-)
|
||
|
||
diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c b/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c
|
||
index 0374c32..40ddff1 100644
|
||
--- a/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c
|
||
+++ b/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c
|
||
@@ -153,7 +153,7 @@ static void report_proc_metrics(struct proc_data_s *proc)
|
||
proc->dns_op.gethostname_failed);
|
||
}
|
||
|
||
- if (proc->proc_io.iowait_us > latency_thr_us) {
|
||
+ if (latency_thr_us > 0 && proc->proc_io.iowait_us > latency_thr_us) {
|
||
report_logs(OO_NAME,
|
||
entityId,
|
||
"iowait_us",
|
||
diff --git a/src/probes/system_infos.probe/system_cpu.c b/src/probes/system_infos.probe/system_cpu.c
|
||
index f433ac4..0582d43 100644
|
||
--- a/src/probes/system_infos.probe/system_cpu.c
|
||
+++ b/src/probes/system_infos.probe/system_cpu.c
|
||
@@ -85,7 +85,7 @@ static void report_cpu_status(struct probe_params *params)
|
||
entityId[0] = 0;
|
||
(void)strcpy(entityId, "cpu");
|
||
|
||
- if (util_per > params->res_percent_upper) {
|
||
+ if (params->res_percent_upper > 0 && util_per > params->res_percent_upper) {
|
||
report_logs(ENTITY_NAME,
|
||
entityId,
|
||
"total_used_per",
|
||
diff --git a/src/probes/system_infos.probe/system_disk.c b/src/probes/system_infos.probe/system_disk.c
|
||
index b249dbf..84065f9 100644
|
||
--- a/src/probes/system_infos.probe/system_disk.c
|
||
+++ b/src/probes/system_infos.probe/system_disk.c
|
||
@@ -54,7 +54,7 @@ static void report_disk_status(df_stats inode_stats, df_stats blk_stats, struct
|
||
|
||
entityid[0] = 0;
|
||
|
||
- if (inode_stats.inode_or_blk_used_per > params->res_percent_upper) {
|
||
+ if (params->res_percent_upper > 0 && inode_stats.inode_or_blk_used_per > params->res_percent_upper) {
|
||
(void)strncpy(entityid, inode_stats.mount_on, LINE_BUF_LEN - 1);
|
||
report_logs(ENTITY_FS_NAME,
|
||
entityid,
|
||
@@ -63,7 +63,7 @@ static void report_disk_status(df_stats inode_stats, df_stats blk_stats, struct
|
||
"Too many Inodes consumed(%d%%).",
|
||
inode_stats.inode_or_blk_used_per);
|
||
}
|
||
- if (blk_stats.inode_or_blk_used_per > params->res_percent_upper) {
|
||
+ if (params->res_percent_upper > 0 && blk_stats.inode_or_blk_used_per > params->res_percent_upper) {
|
||
if (entityid[0] == 0) {
|
||
(void)strncpy(entityid, blk_stats.mount_on, LINE_BUF_LEN - 1);
|
||
}
|
||
@@ -212,7 +212,7 @@ static void report_disk_iostat(const char *disk_name, disk_io_stats *io_info, st
|
||
|
||
entityid[0] = 0;
|
||
|
||
- if (io_info->util > params->res_percent_upper) {
|
||
+ if (params->res_percent_upper > 0 && io_info->util > params->res_percent_upper) {
|
||
(void)strncpy(entityid, disk_name, LINE_BUF_LEN - 1);
|
||
report_logs(ENTITY_DISK_NAME,
|
||
entityid,
|
||
@@ -335,4 +335,4 @@ void system_iostat_destroy(void)
|
||
g_disk_stats = NULL;
|
||
}
|
||
return;
|
||
-}
|
||
\ No newline at end of file
|
||
+}
|
||
diff --git a/src/probes/system_infos.probe/system_meminfo.c b/src/probes/system_infos.probe/system_meminfo.c
|
||
index 5b6ba2d..ceb3815 100644
|
||
--- a/src/probes/system_infos.probe/system_meminfo.c
|
||
+++ b/src/probes/system_infos.probe/system_meminfo.c
|
||
@@ -85,7 +85,7 @@ static void report_meminfo_status(struct probe_params *params, double mem_util,
|
||
(void)strcpy(entityId, "/proc/meminfo");
|
||
(void)strcpy(entityName, "mem");
|
||
// mem util
|
||
- if (mem_util > params->res_percent_upper) {
|
||
+ if (params->res_percent_upper > 0 && mem_util > params->res_percent_upper) {
|
||
report_logs(entityName,
|
||
entityId,
|
||
"util",
|
||
@@ -94,7 +94,7 @@ static void report_meminfo_status(struct probe_params *params, double mem_util,
|
||
mem_util);
|
||
}
|
||
// swap util
|
||
- if (swap_util > params->res_percent_upper) {
|
||
+ if (params->res_percent_upper > 0 && swap_util > params->res_percent_upper) {
|
||
report_logs(entityName,
|
||
entityId,
|
||
"swap_util",
|
||
diff --git a/src/probes/system_infos.probe/system_net.c b/src/probes/system_infos.probe/system_net.c
|
||
index a096464..525aa54 100644
|
||
--- a/src/probes/system_infos.probe/system_net.c
|
||
+++ b/src/probes/system_infos.probe/system_net.c
|
||
@@ -1,4 +1,4 @@
|
||
-/******************************************************************************
|
||
+/******************************************************************************
|
||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
|
||
* gala-gopher licensed under the Mulan PSL v2.
|
||
* You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||
@@ -101,16 +101,19 @@ int system_tcp_probe(void)
|
||
METRICS_TCP_NAME,
|
||
"/proc/dev/snmp",
|
||
g_snmp_stats.tcp_curr_estab,
|
||
- g_snmp_stats.tcp_in_segs - temp.tcp_in_segs,
|
||
- g_snmp_stats.tcp_out_segs - temp.tcp_out_segs,
|
||
- g_snmp_stats.tcp_retrans_segs - temp.tcp_retrans_segs,
|
||
- g_snmp_stats.tcp_in_errs - temp.tcp_in_errs);
|
||
+ (g_snmp_stats.tcp_in_segs > temp.tcp_in_segs) ? (g_snmp_stats.tcp_in_segs - temp.tcp_in_segs) : 0,
|
||
+ (g_snmp_stats.tcp_out_segs > temp.tcp_out_segs) ? (g_snmp_stats.tcp_out_segs - temp.tcp_out_segs) : 0,
|
||
+ (g_snmp_stats.tcp_retrans_segs > temp.tcp_retrans_segs) ?
|
||
+ (g_snmp_stats.tcp_retrans_segs - temp.tcp_retrans_segs) : 0,
|
||
+ (g_snmp_stats.tcp_in_errs > temp.tcp_in_errs) ? (g_snmp_stats.tcp_in_errs - temp.tcp_in_errs) : 0);
|
||
|
||
(void)nprobe_fprintf(stdout, "|%s|%s|%llu|%llu|\n",
|
||
METRICS_UDP_NAME,
|
||
"/proc/dev/snmp",
|
||
- g_snmp_stats.udp_in_datagrams - temp.udp_in_datagrams,
|
||
- g_snmp_stats.udp_out_datagrams - temp.udp_out_datagrams);
|
||
+ (g_snmp_stats.udp_in_datagrams > temp.udp_in_datagrams) ?
|
||
+ (g_snmp_stats.udp_in_datagrams - temp.udp_in_datagrams) : 0,
|
||
+ (g_snmp_stats.udp_out_datagrams > temp.udp_out_datagrams) ?
|
||
+ (g_snmp_stats.udp_out_datagrams - temp.udp_out_datagrams) : 0);
|
||
|
||
(void)fclose(f);
|
||
return 0;
|
||
@@ -278,7 +281,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct
|
||
tx_errs = new_info->tx_errs - old_info->tx_errs;
|
||
rx_errs = new_info->rx_errs - old_info->rx_errs;
|
||
|
||
- if (tx_drops > params->drops_count_thr) {
|
||
+ if (params->drops_count_thr > 0 && tx_drops > params->drops_count_thr) {
|
||
(void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1);
|
||
report_logs(ENTITY_NIC_NAME,
|
||
entityid,
|
||
@@ -287,7 +290,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct
|
||
"net device tx queue drops(%llu).",
|
||
tx_drops);
|
||
}
|
||
- if (rx_drops > params->drops_count_thr) {
|
||
+ if (params->drops_count_thr > 0 && rx_drops > params->drops_count_thr) {
|
||
if (entityid[0] == 0) {
|
||
(void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1);
|
||
}
|
||
@@ -298,7 +301,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct
|
||
"net device rx queue drops(%llu).",
|
||
rx_drops);
|
||
}
|
||
- if (tx_errs > params->drops_count_thr) {
|
||
+ if (params->drops_count_thr > 0 && tx_errs > params->drops_count_thr) {
|
||
if (entityid[0] == 0) {
|
||
(void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1);
|
||
}
|
||
@@ -309,7 +312,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct
|
||
"net device tx queue errors(%llu).",
|
||
tx_errs);
|
||
}
|
||
- if (rx_errs > params->drops_count_thr) {
|
||
+ if (params->drops_count_thr > 0 && rx_errs > params->drops_count_thr) {
|
||
if (entityid[0] == 0) {
|
||
(void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1);
|
||
}
|
||
@@ -375,18 +378,22 @@ int system_net_probe(struct probe_params *params)
|
||
METRICS_NIC_NAME,
|
||
g_dev_stats[index].dev_name,
|
||
g_dev_stats[index].net_status == 1 ? "UP" : "DOWN",
|
||
- g_dev_stats[index].rx_bytes - temp.rx_bytes,
|
||
- g_dev_stats[index].rx_packets - temp.rx_packets,
|
||
- g_dev_stats[index].rx_errs - temp.rx_errs,
|
||
- g_dev_stats[index].rx_dropped - temp.rx_dropped,
|
||
- g_dev_stats[index].tx_bytes - temp.tx_bytes,
|
||
- g_dev_stats[index].tx_packets - temp.tx_packets,
|
||
- g_dev_stats[index].tx_errs - temp.tx_errs,
|
||
- g_dev_stats[index].tx_dropped - temp.tx_dropped,
|
||
- SPEED_VALUE(temp.rx_bytes, g_dev_stats[index].rx_bytes, params->period),
|
||
- SPEED_VALUE(temp.tx_bytes, g_dev_stats[index].tx_bytes, params->period),
|
||
- g_dev_stats[index].tc_sent_drop_count - temp.tc_sent_drop_count,
|
||
- g_dev_stats[index].tc_sent_overlimits_count - temp.tc_sent_overlimits_count,
|
||
+ (g_dev_stats[index].rx_bytes > temp.rx_bytes) ? (g_dev_stats[index].rx_bytes - temp.rx_bytes) : 0,
|
||
+ (g_dev_stats[index].rx_packets > temp.rx_packets) ? (g_dev_stats[index].rx_packets - temp.rx_packets) : 0,
|
||
+ (g_dev_stats[index].rx_errs > temp.rx_errs) ? (g_dev_stats[index].rx_errs - temp.rx_errs) : 0,
|
||
+ (g_dev_stats[index].rx_dropped > temp.rx_dropped) ? (g_dev_stats[index].rx_dropped - temp.rx_dropped) : 0,
|
||
+ (g_dev_stats[index].tx_bytes > temp.tx_bytes) ? (g_dev_stats[index].tx_bytes - temp.tx_bytes) : 0,
|
||
+ (g_dev_stats[index].tx_packets > temp.tx_packets) ? (g_dev_stats[index].tx_packets - temp.tx_packets) : 0,
|
||
+ (g_dev_stats[index].tx_errs > temp.tx_errs) ? (g_dev_stats[index].tx_errs - temp.tx_errs) : 0,
|
||
+ (g_dev_stats[index].tx_dropped > temp.tx_dropped) ? (g_dev_stats[index].tx_dropped - temp.tx_dropped) : 0,
|
||
+ (g_dev_stats[index].rx_bytes > temp.rx_bytes) ?
|
||
+ SPEED_VALUE(temp.rx_bytes, g_dev_stats[index].rx_bytes, params->period) : 0,
|
||
+ (g_dev_stats[index].tx_bytes > temp.tx_bytes) ?
|
||
+ SPEED_VALUE(temp.tx_bytes, g_dev_stats[index].tx_bytes, params->period) : 0,
|
||
+ (g_dev_stats[index].tc_sent_drop_count > temp.tc_sent_drop_count) ?
|
||
+ (g_dev_stats[index].tc_sent_drop_count - temp.tc_sent_drop_count) : 0,
|
||
+ (g_dev_stats[index].tc_sent_overlimits_count > temp.tc_sent_overlimits_count) ?
|
||
+ (g_dev_stats[index].tc_sent_overlimits_count - temp.tc_sent_overlimits_count) : 0,
|
||
g_dev_stats[index].tc_backlog_count,
|
||
g_dev_stats[index].tc_ecn_mark);
|
||
/* output event */
|
||
--
|
||
2.33.0
|
||
|