- fix ksliprobe get invalid args occasionally at startup - fix error print when starting gala-gopher - add system_uuid field to distinguish client when post to pyroscope server - repair stackprobe caused cpu rush - add support to pyroscope - bugfix: add check if thread is 0 - fix stackprobe memory allocation and deallocation errors - normalize time format in flamegraph svg filename (cherry picked from commit 6aef5cc8e4e2a34324c3f01663d2b61c0462f4ac)
274 lines
9.4 KiB
Diff
274 lines
9.4 KiB
Diff
From 61431063cc0d52d4cee266cf5af6caccb6bc7803 Mon Sep 17 00:00:00 2001
|
||
From: wo_cow <niuqianqian@huawei.com>
|
||
Date: Mon, 12 Dec 2022 16:47:57 +0800
|
||
Subject: [PATCH] add configuration instructions
|
||
|
||
---
|
||
doc/conf_introduction.md | 70 +++++++++++++++++++
|
||
src/common/util.c | 8 +--
|
||
.../cadvisor.probe/cadvisor_probe.conf | 4 +-
|
||
.../cadvisor.probe/cadvisor_probe.meta | 4 +-
|
||
.../python.probe/cadvisor.probe/readme.md | 49 +++++++++++++
|
||
.../python.probe/pg_stat.probe/readme.md | 23 ++++++
|
||
6 files changed, 150 insertions(+), 8 deletions(-)
|
||
create mode 100644 src/probes/extends/python.probe/cadvisor.probe/readme.md
|
||
create mode 100644 src/probes/extends/python.probe/pg_stat.probe/readme.md
|
||
|
||
diff --git a/doc/conf_introduction.md b/doc/conf_introduction.md
|
||
index ce15a92..a35a70a 100644
|
||
--- a/doc/conf_introduction.md
|
||
+++ b/doc/conf_introduction.md
|
||
@@ -5,10 +5,20 @@ gala-gopher启动必须的外部参数通过配置文件`gala-gopher.conf`定义
|
||
|
||
gala-gopher支持用户配置观测的应用范围,即支持用户设置关注的、需要监测的具体应用,此项配置是在`gala-gopher-app.conf`配置文件中配置。
|
||
|
||
+部分extend探针有自己的配置文件,开启该探针前需要设置好探针的配置文件。
|
||
+
|
||
## 配置介绍
|
||
|
||
配置文件归档在[config目录](../config)。
|
||
|
||
+extend探针配置文件归档在探针同级目录下。目前有配置文件的探针有:
|
||
+
|
||
+[stackprobe](../src/probes/extends/ebpf.probe/src/stackprobe)
|
||
+
|
||
+[cadvisor.probe](../src/probes/extends/python.probe/cadvisor.probe)
|
||
+
|
||
+[pg_stat.probe](../src/probes/extends/python.probe/pg_stat.probe)
|
||
+
|
||
### gala-gopher.conf
|
||
|
||
`gala-gopher.conf`文件的安装路径为 `/opt/gala-gopher/gala-gopher.conf`。该文件配置项说明如下:
|
||
@@ -84,6 +94,66 @@ gala-gopher支持用户配置观测的应用范围,即支持用户设置关注
|
||
配置示例参见 [gala-gopher-app.conf示例](#gala-gopher-app.conf示例) 。
|
||
|
||
|
||
+### stackprobe.conf
|
||
+
|
||
+`stackprobe.conf`文件的安装路径为 `/opt/gala-gopher/extend_probes/stackprobe.conf`。该文件配置项说明如下:
|
||
+
|
||
+- general:通用设置
|
||
+ - period:火焰图生成周期
|
||
+ - log_dir:stackprobe探针日志路径
|
||
+ - svg_dir:svg格式火焰图存储路径
|
||
+ - flame_dir:堆栈信息存储路径
|
||
+ - debug_dir:调试信息文件路径
|
||
+- flame_name:各类型火焰图开关
|
||
+ - oncpu:oncpu火焰图开关
|
||
+ - offcpu:offcpu火焰图开关
|
||
+ - io:io火焰图开关
|
||
+ - memleak:内存泄漏火焰图开关
|
||
+- application:暂未使用
|
||
+
|
||
+
|
||
+### cadvisor_probe.conf
|
||
+
|
||
+`cadvisor_probe.conf`文件的安装路径为 `/opt/gala-gopher/extend_probes/cadvisor_probe.conf`。该文件配置项说明如下:
|
||
+
|
||
+- version:配置文件版本号
|
||
+- measurements:待集成到gala-gopher的观测指标
|
||
+ - table_name: 数据表名称
|
||
+ - entity_name: 观测对象名称
|
||
+ - fields:数据字段
|
||
+ - description:数据字段描述信息
|
||
+ - type:数据字段类型,需和cAdvisor上报数据类型一致
|
||
+ - name:数据字段名称,需和cAdvisor上报数据名称一致
|
||
+
|
||
+> 说明:cadvisor_probe.conf和cadvisor_probe.meta的字段需要一致。例外:若conf中type字段为counter,在meta中对应type字段应为gauge
|
||
+
|
||
+
|
||
+### pg_stat_probe.conf
|
||
+
|
||
+`pg_stat_probe.conf`文件的安装路径为 `/opt/gala-gopher/extend_probes/pg_stat_probe.conf`。该文件配置项说明如下:
|
||
+
|
||
+- servers:PostgreSQL服务端配置
|
||
+ - ip:服务端IP
|
||
+ - port:服务端端口
|
||
+ - dbname:服务端任意数据库名称
|
||
+ - user:用户名
|
||
+ - password:用户密码
|
||
+
|
||
+上述配置用户需能够访问pg_stat_database视图,配置最小权限的命令如下:
|
||
+
|
||
+PostgreSQL:
|
||
+```shell
|
||
+grant SELECT ON pg_stat_database to <USER>;
|
||
+grant pg_monitor to <USER>;
|
||
+```
|
||
+
|
||
+GaussDB:
|
||
+```shell
|
||
+grant usage on schema dbe_perf to <USER>;
|
||
+grant select on pg_stat_replication to <USER>;
|
||
+```
|
||
+
|
||
+
|
||
|
||
## 启动参数介绍
|
||
|
||
diff --git a/src/common/util.c b/src/common/util.c
|
||
index 1575546..e25e9ee 100644
|
||
--- a/src/common/util.c
|
||
+++ b/src/common/util.c
|
||
@@ -24,7 +24,7 @@
|
||
|
||
char *get_cur_date(void)
|
||
{
|
||
- /* return date str, ex: 2021/5/17 */
|
||
+ /* return date str, ex: 2021/05/17 */
|
||
static char tm[TM_STR_LEN] = {0};
|
||
struct tm *tmp_ptr = NULL;
|
||
time_t t;
|
||
@@ -34,7 +34,7 @@ char *get_cur_date(void)
|
||
tmp_ptr = localtime(&t);
|
||
(void)snprintf(tm,
|
||
TM_STR_LEN,
|
||
- "%d-%d-%d",
|
||
+ "%d-%02d-%02d",
|
||
(1900 + tmp_ptr->tm_year),
|
||
(1 + tmp_ptr->tm_mon),
|
||
tmp_ptr->tm_mday);
|
||
@@ -43,7 +43,7 @@ char *get_cur_date(void)
|
||
|
||
char *get_cur_time(void)
|
||
{
|
||
- /* return time str, ex: 2021/5/17 19:56:03 */
|
||
+ /* return time str, ex: 2021/05/17 19:56:03 */
|
||
static char tm[TM_STR_LEN] = {0};
|
||
struct tm *tmp_ptr = NULL;
|
||
time_t t;
|
||
@@ -53,7 +53,7 @@ char *get_cur_time(void)
|
||
tmp_ptr = localtime(&t);
|
||
(void)snprintf(tm,
|
||
TM_STR_LEN,
|
||
- "%d-%d-%d-%02d-%02d-%02d",
|
||
+ "%d-%02d-%02d-%02d-%02d-%02d",
|
||
(1900 + tmp_ptr->tm_year),
|
||
(1 + tmp_ptr->tm_mon),
|
||
tmp_ptr->tm_mday,
|
||
diff --git a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf
|
||
index 215bb70..3027d4f 100644
|
||
--- a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf
|
||
+++ b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf
|
||
@@ -189,12 +189,12 @@ measurements:
|
||
name: "container_id",
|
||
},
|
||
{
|
||
- description: "...",
|
||
+ description: "failure type",
|
||
type: "label",
|
||
name: "failure_type",
|
||
},
|
||
{
|
||
- description: "...",
|
||
+ description: "scope",
|
||
type: "label",
|
||
name: "scope",
|
||
},
|
||
diff --git a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta
|
||
index 598d585..178c750 100644
|
||
--- a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta
|
||
+++ b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta
|
||
@@ -189,12 +189,12 @@ measurements:
|
||
name: "container_id",
|
||
},
|
||
{
|
||
- description: "...",
|
||
+ description: "failure type",
|
||
type: "label",
|
||
name: "failure_type",
|
||
},
|
||
{
|
||
- description: "...",
|
||
+ description: "scope",
|
||
type: "label",
|
||
name: "scope",
|
||
},
|
||
diff --git a/src/probes/extends/python.probe/cadvisor.probe/readme.md b/src/probes/extends/python.probe/cadvisor.probe/readme.md
|
||
new file mode 100644
|
||
index 0000000..62a5532
|
||
--- /dev/null
|
||
+++ b/src/probes/extends/python.probe/cadvisor.probe/readme.md
|
||
@@ -0,0 +1,49 @@
|
||
+# cadvisor 探针开发说明
|
||
+
|
||
+## 功能描述
|
||
+
|
||
+集成容器性能分析工具[cAdvisor](https://github.com/google/cadvisor)的统计数据。支持的功能有:
|
||
+
|
||
+- 设置cAdvisor监听端口(必需)
|
||
+
|
||
+ 通过-p参数设置,无默认值,示例:
|
||
+
|
||
+ `python3 cadvisor_probe.py -p 8080`
|
||
+
|
||
+ 表示监控cAdvisor输出,若cAdvisor未启动,则通过`cadvisor -port 8080`启动cAdvisor
|
||
+
|
||
+- 设置观测周期
|
||
+
|
||
+ 通过-d参数设置,单位为秒,默认值5,示例:
|
||
+
|
||
+ `python3 cadvisor_probe.py -p 8080 -d 5`
|
||
+
|
||
+ 表示每隔5s输出统计信息
|
||
+
|
||
+- 开启观测白名单
|
||
+
|
||
+ 通过-F参数设置,配置为`task`表示按照`gala-gopher-app.conf`过滤,配置为具体进程的pid表示仅监控此进程,不配置则观测所有进程,默认不配置,示例:
|
||
+
|
||
+ `python3 cadvisor_probe.py -p 8080 -F task`
|
||
+
|
||
+ 表示只观测`gala-gopher-app.conf`中的进程
|
||
+
|
||
+ `python3 cadvisor_probe.py -p 8080 -F 1234`
|
||
+
|
||
+ 表示只观测pid为1234的进程
|
||
+
|
||
+- 设置容器观测指标
|
||
+
|
||
+ 通过cadvisor_probe.conf和cadvisor_probe.meta配置,二者需对应。配置方法详见[conf_introduction.md](../../../../../doc/conf_introduction.md#cadvisor_probe.conf)
|
||
+
|
||
+- 容器运行信息监控,具体的观测指标信息参见`cadvisor_probe.meta`。
|
||
+
|
||
+## 采集方案
|
||
+
|
||
+拉起cAdvisor进程,并监控[cAdvisor原始Prometheus统计数据](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md),
|
||
+采集cadvisor_probe.conf中配置的统计项,将数据格式转换后按照cadvisor_probe.meta输出为gala-gopher框架支持的格式。
|
||
+
|
||
+## 约束条件
|
||
+
|
||
+- 需要预先安装cAdvisor
|
||
+
|
||
diff --git a/src/probes/extends/python.probe/pg_stat.probe/readme.md b/src/probes/extends/python.probe/pg_stat.probe/readme.md
|
||
new file mode 100644
|
||
index 0000000..1ddd6b7
|
||
--- /dev/null
|
||
+++ b/src/probes/extends/python.probe/pg_stat.probe/readme.md
|
||
@@ -0,0 +1,23 @@
|
||
+# pg_stat 探针开发说明
|
||
+
|
||
+## 功能描述
|
||
+
|
||
+获取PostgreSQL Sever的TPS统计数据。支持的功能有:
|
||
+
|
||
+- 设置被观测服务端信息
|
||
+
|
||
+ 通过pg_stat_probe.conf设置,支持多服务端,配置方法详见[conf_introduction.md](../../../../../doc/conf_introduction.md#pg_stat_probe.conf)
|
||
+
|
||
+- 设置观测周期
|
||
+
|
||
+ 通过-d参数设置,单位为秒,默认值5,示例:
|
||
+
|
||
+ `python3 pg_stat_probe.py -d 5`
|
||
+
|
||
+ 表示每隔5s输出统计信息
|
||
+
|
||
+- 观测PostgreSQL Sever中各数据库的TPS统计数据,具体的观测指标信息参见`pg_stat_probe.meta`
|
||
+
|
||
+## 采集方案
|
||
+
|
||
+通过计算数据库已提交的事务数在单位时间内的增长来计算TPS
|
||
--
|
||
2.33.0
|
||
|