gala-gopher/fix-ksliprobe-get-invalid-args-occasionally-at-start.patch
Zhen Chen 4a36a5936f sync bugfix patches from openeuler/gala-gopher
- fix ksliprobe get invalid args occasionally at startup
- fix error print when starting gala-gopher
- add system_uuid field to distinguish client when post to pyroscope server
- repair stackprobe caused cpu rush
- add support to pyroscope
- bugfix: add check if thread is 0
- fix stackprobe memory allocation and deallocation errors
- normalize time format in flamegraph svg filename

(cherry picked from commit 6aef5cc8e4e2a34324c3f01663d2b61c0462f4ac)
2023-01-17 22:29:44 +08:00

172 lines
7.6 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 6c2753778bcc7e124353a1e21612a1ebdb92f241 Mon Sep 17 00:00:00 2001
From: wo_cow <niuqianqian@huawei.com>
Date: Tue, 10 Jan 2023 22:39:28 +0800
Subject: [PATCH] fix ksliprobe get invalid args occasionally at startup
---
src/common/args.c | 2 +-
src/common/args.h | 2 +-
.../ebpf.probe/src/ksliprobe/ksliprobe.bpf.c | 20 ++++++++++++-------
.../ebpf.probe/src/ksliprobe/ksliprobe.c | 8 ++++----
.../ebpf.probe/src/ksliprobe/ksliprobe.h | 4 ++--
5 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/src/common/args.c b/src/common/args.c
index 6c8e2d1..5c6bee7 100644
--- a/src/common/args.c
+++ b/src/common/args.c
@@ -157,7 +157,7 @@ static int __period_arg_parse(char opt, char *arg, struct probe_params *params)
}
break;
case 'C':
- params->cycle_sampling_flag = 1;
+ params->continuous_sampling_flag = 1;
break;
case 'k':
params->kafka_port = (unsigned int)atoi(arg);
diff --git a/src/common/args.h b/src/common/args.h
index f8ab447..8fd5139 100644
--- a/src/common/args.h
+++ b/src/common/args.h
@@ -44,7 +44,7 @@ struct probe_params {
char res_percent_upper; // [-U <>] Upper limit of resource percentage, default is 0%
char res_percent_lower; // [-L <>] Lower limit of resource percentage, default is 0%
unsigned char cport_flag; // [-c <>] Indicates whether the probes(such as tcp) identifies the client port, default is 0 (no identify)
- char cycle_sampling_flag; // [-C <>] Enables the cycle sampling, default is 0
+ char continuous_sampling_flag; // [-C <>] Enables the continuous sampling, default is 0
char target_dev[DEV_NAME]; // [-d <>] Device name, default is null
char elf_path[MAX_PATH_LEN]; // [-p <>] Set ELF file path of the monitored software, default is null
char task_whitelist[MAX_PATH_LEN]; // [-w <>] Filtering app monitoring ranges, default is null
diff --git a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.bpf.c b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.bpf.c
index 6fa64ab..f54df41 100644
--- a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.bpf.c
+++ b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.bpf.c
@@ -116,17 +116,17 @@ static __always_inline void get_args(struct conn_data_t *conn_data)
{
u32 key = 0;
u64 period = __PERIOD;
- char cycle_sampling_flag = 0;
+ char continuous_sampling_flag = 0;
struct ksli_args_s *args;
args = (struct ksli_args_s *)bpf_map_lookup_elem(&args_map, &key);
if (args) {
period = args->period;
- cycle_sampling_flag = args->cycle_sampling_flag;
+ continuous_sampling_flag = args->continuous_sampling_flag;
}
conn_data->report_period = period;
- conn_data->cycle_sampling_flag = cycle_sampling_flag;
+ conn_data->continuous_sampling_flag = continuous_sampling_flag;
return;
}
@@ -262,7 +262,13 @@ static __always_inline int periodic_report(u64 ts_nsec, struct conn_data_t *conn
{
long err;
int ret = 0;
- u64 period = conn_data->report_period;
+
+ // period cannot be 0, so it is considered that the user mode has not written to args_map by now.
+ // therefore we try to get the value agagin.
+ if (conn_data->report_period == 0)
+ get_args(conn_data);
+
+ u64 period = (conn_data->report_period != 0) ? conn_data->report_period : __PERIOD;
// 表示没有任何采样数据,不上报
if (conn_data->latency.rtt_nsec == 0) {
@@ -299,7 +305,7 @@ static __always_inline void sample_finished(struct conn_data_t *conn_data, struc
conn_data->latency.rtt_nsec = csd->rtt_ts_nsec;
__builtin_memcpy(&conn_data->latency.command, &csd->command, MAX_COMMAND_REQ_SIZE);
}
- if (conn_data->cycle_sampling_flag) {
+ if (conn_data->continuous_sampling_flag) {
if (conn_data->max.rtt_nsec < csd->rtt_ts_nsec) {
conn_data->max.rtt_nsec = csd->rtt_ts_nsec;
__builtin_memcpy(&conn_data->max.command, &csd->command, MAX_COMMAND_REQ_SIZE);
@@ -350,7 +356,7 @@ static __always_inline void process_rd_msg(u32 tgid, int fd, const char *buf, co
}
// 非循环采样每次上报后就返回等待下次上报周期再采样。这种方式无法获取周期内max sli
- if (!conn_data->cycle_sampling_flag && reported)
+ if (!conn_data->continuous_sampling_flag && reported)
return;
// 连接的协议类型未知时连续3次read报文时解析不出是redis协议就确认此条连接非redis请求连接不做采样
@@ -403,7 +409,7 @@ KPROBE(ksys_read, pt_regs)
if (conn_data->id.protocol == PROTOCOL_NO_REDIS)
return;
- if (!conn_data->cycle_sampling_flag) {
+ if (!conn_data->continuous_sampling_flag) {
if (bpf_ktime_get_ns() - conn_data->last_report_ts_nsec < conn_data->report_period)
return;
}
diff --git a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.c b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.c
index d8221c9..ac8484e 100644
--- a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.c
+++ b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.c
@@ -40,7 +40,7 @@
#define MAX_SLI_TBL_NAME "redis_max_sli"
static volatile sig_atomic_t stop;
-static struct probe_params params = {.period = DEFAULT_PERIOD, .cycle_sampling_flag = 0};
+static struct probe_params params = {.period = DEFAULT_PERIOD, .continuous_sampling_flag = 0};
static void sig_int(int signo)
{
@@ -113,7 +113,7 @@ static void msg_event_handler(void *ctx, int cpu, void *data, unsigned int size)
cli_ip_str,
ntohs(msg_evt_data->client_ip_info.port),
msg_evt_data->latency.rtt_nsec);
- if (params.cycle_sampling_flag) {
+ if (params.continuous_sampling_flag) {
fprintf(stdout,
"|%s|%d|%d|%s|%s|%s|%u|%s|%u|%llu|\n",
MAX_SLI_TBL_NAME,
@@ -173,7 +173,7 @@ static void load_args(int args_fd, struct probe_params* params)
struct ksli_args_s args = {0};
args.period = NS(params->period);
- args.cycle_sampling_flag = params->cycle_sampling_flag;
+ args.continuous_sampling_flag = params->continuous_sampling_flag;
(void)bpf_map_update_elem(args_fd, &key, &args, BPF_ANY);
}
@@ -187,7 +187,7 @@ int main(int argc, char **argv)
return -1;
}
printf("arg parse interval time:%us\n", params.period);
- printf("arg parse if cycle sampling:%s\n", params.cycle_sampling_flag ? "true": "false");
+ printf("arg parse if cycle sampling:%s\n", params.continuous_sampling_flag ? "true": "false");
#ifdef KERNEL_SUPPORT_TSTAMP
load_tc_bpf(params.netcard_list, TC_PROG, TC_TYPE_INGRESS);
diff --git a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.h b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.h
index 40d2398..023e0e7 100644
--- a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.h
+++ b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.h
@@ -36,7 +36,7 @@
struct ksli_args_s {
__u64 period; // Sampling period, unit ns
- char cycle_sampling_flag; // Enables the sampling of max sli within a period (which cause some performance degradation)
+ char continuous_sampling_flag; // Enables the sampling of max sli within a period (which cause some performance degradation)
};
enum msg_event_rw_t {
@@ -89,7 +89,7 @@ struct conn_data_t {
struct rtt_cmd_t current;
__u64 last_report_ts_nsec; // 上一次上报完成的时间点
__u64 report_period; // 上报周期
- char cycle_sampling_flag;
+ char continuous_sampling_flag;
char procotol_check_times;
};
--
2.33.0