- fix ksliprobe get invalid args occasionally at startup - fix error print when starting gala-gopher - add system_uuid field to distinguish client when post to pyroscope server - repair stackprobe caused cpu rush - add support to pyroscope - bugfix: add check if thread is 0 - fix stackprobe memory allocation and deallocation errors - normalize time format in flamegraph svg filename (cherry picked from commit 6aef5cc8e4e2a34324c3f01663d2b61c0462f4ac)
205 lines
7.6 KiB
Diff
205 lines
7.6 KiB
Diff
From 8051b4711f4c93e6f6858847555ef0c4c4dd1db5 Mon Sep 17 00:00:00 2001
|
|
From: wo_cow <niuqianqian@huawei.com>
|
|
Date: Fri, 6 Jan 2023 17:18:07 +0800
|
|
Subject: [PATCH] repair stackprobe caused cpu rush
|
|
|
|
---
|
|
.../ebpf.probe/src/stackprobe/flame_graph.c | 32 +++++++++++--------
|
|
.../extends/ebpf.probe/src/stackprobe/stack.h | 4 +--
|
|
.../ebpf.probe/src/stackprobe/stackprobe.c | 16 +++++-----
|
|
.../ebpf.probe/src/stackprobe/stackprobe.h | 2 +-
|
|
4 files changed, 29 insertions(+), 25 deletions(-)
|
|
|
|
diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c
|
|
index 126b98d..40c6dcb 100644
|
|
--- a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c
|
|
+++ b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c
|
|
@@ -38,6 +38,8 @@
|
|
#include "bpf.h"
|
|
#include "flame_graph.h"
|
|
|
|
+#define POST_MAX_LEN 131072
|
|
+
|
|
struct post_info_s {
|
|
int post_flag;
|
|
int sk;
|
|
@@ -193,7 +195,6 @@ static void __reopen_flame_graph_file(struct stack_svg_mng_s *svg_mng)
|
|
#define HISTO_TMP_LEN (2 * STACK_SYMBS_LEN)
|
|
static char __histo_tmp_str[HISTO_TMP_LEN];
|
|
|
|
-#define POST_MAX_LEN 2048
|
|
static int __do_wr_stack_histo(struct stack_svg_mng_s *svg_mng,
|
|
struct stack_trace_histo_s *stack_trace_histo, int first, struct post_info_s *post_info)
|
|
{
|
|
@@ -246,7 +247,7 @@ static int __build_url(char *url, struct post_server_s *post_server, int en_type
|
|
time_t now, before;
|
|
(void)time(&now);
|
|
if (post_server->last_post_ts == 0) {
|
|
- before = now - 60; // 60s
|
|
+ before = now - 30; // 60s
|
|
} else {
|
|
before = post_server->last_post_ts + 1;
|
|
}
|
|
@@ -267,12 +268,13 @@ static void __curl_post(struct post_server_s *post_server, struct post_info_s *p
|
|
CURLcode res;
|
|
CURL *curl = post_info->curl;
|
|
if (curl == NULL) {
|
|
- return;
|
|
+ goto end2;
|
|
}
|
|
+
|
|
long post_len = (long)strlen(post_info->buf_start);
|
|
if (post_len == 0) {
|
|
DEBUG("[FLAMEGRAPH]: buf is null. No need to curl post post to %s\n", appname[en_type]);
|
|
- return;
|
|
+ goto end1;
|
|
}
|
|
|
|
char url[LINE_BUF_LEN] = {0};
|
|
@@ -306,26 +308,28 @@ static void __curl_post(struct post_server_s *post_server, struct post_info_s *p
|
|
res = curl_easy_perform(curl);
|
|
/* Check for errors */
|
|
if(res != CURLE_OK) {
|
|
- ERROR("[FLAMEGRAPH]: curl post failed: %s\n", curl_easy_strerror(res));
|
|
+ ERROR("[FLAMEGRAPH]: curl post to %s failed: %s\n", url, curl_easy_strerror(res));
|
|
} else {
|
|
- INFO("[FLAMEGRAPH]: curl post post to %s success\n", appname[en_type]);
|
|
+ INFO("[FLAMEGRAPH]: curl post post to %s success\n", url, post_info->remain_size);
|
|
}
|
|
|
|
- /* always cleanup */
|
|
- curl_easy_cleanup(curl);
|
|
if (chunk.memory) {
|
|
free(chunk.memory);
|
|
- chunk.memory = NULL;
|
|
}
|
|
- free(post_info->buf_start);
|
|
- post_info->buf_start = NULL;
|
|
-
|
|
+end1:
|
|
+ /* always cleanup */
|
|
+ curl_easy_cleanup(curl);
|
|
+end2:
|
|
+ if (post_info->buf_start != NULL) {
|
|
+ free(post_info->buf_start);
|
|
+ post_info->buf_start = NULL;
|
|
+ }
|
|
return;
|
|
}
|
|
|
|
static void __init_curl_handle(struct post_server_s *post_server, struct post_info_s *post_info)
|
|
{
|
|
- if (post_server == NULL || post_server->post_flag == 0) {
|
|
+ if (post_server == NULL || post_server->post_enable == 0) {
|
|
return;
|
|
}
|
|
|
|
@@ -419,7 +423,7 @@ int set_post_server(struct post_server_s *post_server, const char *server_str)
|
|
}
|
|
|
|
curl_global_init(CURL_GLOBAL_ALL);
|
|
- post_server->post_flag = 1;
|
|
+ post_server->post_enable = 1;
|
|
post_server->timeout = 3;
|
|
(void)strcpy(post_server->host, server_str);
|
|
|
|
diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stack.h b/src/probes/extends/ebpf.probe/src/stackprobe/stack.h
|
|
index 5a932b1..976ff81 100644
|
|
--- a/src/probes/extends/ebpf.probe/src/stackprobe/stack.h
|
|
+++ b/src/probes/extends/ebpf.probe/src/stackprobe/stack.h
|
|
@@ -19,10 +19,10 @@
|
|
|
|
#include "common.h"
|
|
|
|
-#define AGGRE_PERIOD (1 * 60 * 1000) // 1min
|
|
+#define AGGRE_PERIOD (1 * 30 * 1000) // 30s
|
|
#define SAMPLE_PERIOD (10) // 10ms
|
|
#define TMOUT_PERIOD (AGGRE_PERIOD / 1000) // Second as unit
|
|
-#define PROC_CACHE_MAX_COUNT 10 // Cache 10 proc symbols
|
|
+#define PROC_CACHE_MAX_COUNT 100 // Cache 100 proc symbols
|
|
#define DIV_ROUND_UP(NUM, DEN) ((NUM + DEN - 1) / DEN)
|
|
|
|
#define PERCPU_SAMPLE_COUNT (2 * DIV_ROUND_UP(AGGRE_PERIOD, SAMPLE_PERIOD))
|
|
diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c
|
|
index a4733e6..9fa5079 100644
|
|
--- a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c
|
|
+++ b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c
|
|
@@ -753,7 +753,7 @@ static void destroy_stack_trace(struct stack_trace_s **ptr_st)
|
|
return;
|
|
}
|
|
|
|
- if (st->post_server.post_flag) {
|
|
+ if (st->post_server.post_enable) {
|
|
clean_post_server();
|
|
}
|
|
|
|
@@ -849,7 +849,7 @@ static struct stack_trace_s *create_stack_trace(StackprobeConfig *conf)
|
|
|
|
if (set_post_server(&st->post_server, conf->generalConfig->pyroscopeServer) != 0) {
|
|
INFO("[STACKPROBE]: Do not post to Pyroscope Server.\n");
|
|
- st->post_server.post_flag = 0;
|
|
+ st->post_server.post_enable = 0;
|
|
} else {
|
|
INFO("[STACKPROBE]: Will post to Pyroscope Server: %s.\n", conf->generalConfig->pyroscopeServer);
|
|
}
|
|
@@ -1085,9 +1085,9 @@ static int add_pids()
|
|
// find_bpf_link and add_bpf_link will set bpf_link status
|
|
if (!find_bpf_link(pid)) {
|
|
if (add_bpf_link(pid) != 0) {
|
|
- fprintf(stderr, "add pid %u failed\n", pid);
|
|
+ ERROR("[STACKPROBE]: add pid %u failed\n", pid);
|
|
} else {
|
|
- printf("add of pid %u success\n", pid);
|
|
+ INFO("[STACKPROBE]: add of pid %u success\n", pid);
|
|
}
|
|
}
|
|
}
|
|
@@ -1103,7 +1103,7 @@ static void clear_invalid_pids()
|
|
}
|
|
H_ITER(bpf_link_head, pid_bpf_links, tmp) {
|
|
if (pid_bpf_links->v.pid_state == PID_NOEXIST) {
|
|
- printf("clear bpf link of pid %u\n", pid_bpf_links->pid);
|
|
+ INFO("[STACKPROBE]: clear bpf link of pid %u\n", pid_bpf_links->pid);
|
|
H_DEL(bpf_link_head, pid_bpf_links);
|
|
(void)free(pid_bpf_links);
|
|
}
|
|
@@ -1344,11 +1344,11 @@ static void init_wr_flame_pthreads(struct svg_stack_trace_s *svg_st, const char
|
|
|
|
err = pthread_create(&wr_flame_thd, NULL, __running, (void *)svg_st);
|
|
if (err != 0) {
|
|
- fprintf(stderr, "Failed to create %s wr_flame_pthread.\n", flame_name);
|
|
+ ERROR("[STACKPROBE]: Failed to create %s wr_flame_pthread.\n", flame_name);
|
|
g_stop = 1;
|
|
return;
|
|
}
|
|
- printf("%s wr_flame_pthread successfully started!\n", flame_name);
|
|
+ INFO("[STACKPROBE]: %s wr_flame_pthread successfully started!\n", flame_name);
|
|
|
|
return;
|
|
}
|
|
@@ -1410,7 +1410,7 @@ int main(int argc, char **argv)
|
|
StackprobeConfig *conf;
|
|
|
|
if (signal(SIGINT, sig_int) == SIG_ERR) {
|
|
- fprintf(stderr, "can't set signal handler: %d\n", errno);
|
|
+ ERROR("[STACKPROBE]: can't set signal handler: %d\n", errno);
|
|
return errno;
|
|
}
|
|
|
|
diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h
|
|
index 657b6e7..97e5ea5 100644
|
|
--- a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h
|
|
+++ b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h
|
|
@@ -103,7 +103,7 @@ struct svg_stack_trace_s {
|
|
};
|
|
|
|
struct post_server_s {
|
|
- char post_flag;
|
|
+ char post_enable;
|
|
long timeout; // sec
|
|
char host[PATH_LEN];
|
|
time_t last_post_ts;
|
|
--
|
|
2.33.0
|
|
|