From 61431063cc0d52d4cee266cf5af6caccb6bc7803 Mon Sep 17 00:00:00 2001 From: wo_cow Date: Mon, 12 Dec 2022 16:47:57 +0800 Subject: [PATCH] add configuration instructions --- doc/conf_introduction.md | 70 +++++++++++++++++++ src/common/util.c | 8 +-- .../cadvisor.probe/cadvisor_probe.conf | 4 +- .../cadvisor.probe/cadvisor_probe.meta | 4 +- .../python.probe/cadvisor.probe/readme.md | 49 +++++++++++++ .../python.probe/pg_stat.probe/readme.md | 23 ++++++ 6 files changed, 150 insertions(+), 8 deletions(-) create mode 100644 src/probes/extends/python.probe/cadvisor.probe/readme.md create mode 100644 src/probes/extends/python.probe/pg_stat.probe/readme.md diff --git a/doc/conf_introduction.md b/doc/conf_introduction.md index ce15a92..a35a70a 100644 --- a/doc/conf_introduction.md +++ b/doc/conf_introduction.md @@ -5,10 +5,20 @@ gala-gopher启动必须的外部参数通过配置文件`gala-gopher.conf`定义 gala-gopher支持用户配置观测的应用范围,即支持用户设置关注的、需要监测的具体应用,此项配置是在`gala-gopher-app.conf`配置文件中配置。 +部分extend探针有自己的配置文件,开启该探针前需要设置好探针的配置文件。 + ## 配置介绍 配置文件归档在[config目录](../config)。 +extend探针配置文件归档在探针同级目录下。目前有配置文件的探针有: + +[stackprobe](../src/probes/extends/ebpf.probe/src/stackprobe) + +[cadvisor.probe](../src/probes/extends/python.probe/cadvisor.probe) + +[pg_stat.probe](../src/probes/extends/python.probe/pg_stat.probe) + ### gala-gopher.conf `gala-gopher.conf`文件的安装路径为 `/opt/gala-gopher/gala-gopher.conf`。该文件配置项说明如下: @@ -84,6 +94,66 @@ gala-gopher支持用户配置观测的应用范围,即支持用户设置关注 配置示例参见 [gala-gopher-app.conf示例](#gala-gopher-app.conf示例) 。 +### stackprobe.conf + +`stackprobe.conf`文件的安装路径为 `/opt/gala-gopher/extend_probes/stackprobe.conf`。该文件配置项说明如下: + +- general:通用设置 + - period:火焰图生成周期 + - log_dir:stackprobe探针日志路径 + - svg_dir:svg格式火焰图存储路径 + - flame_dir:堆栈信息存储路径 + - debug_dir:调试信息文件路径 +- flame_name:各类型火焰图开关 + - oncpu:oncpu火焰图开关 + - offcpu:offcpu火焰图开关 + - io:io火焰图开关 + - memleak:内存泄漏火焰图开关 +- application:暂未使用 + + +### cadvisor_probe.conf + +`cadvisor_probe.conf`文件的安装路径为 `/opt/gala-gopher/extend_probes/cadvisor_probe.conf`。该文件配置项说明如下: + +- version:配置文件版本号 +- measurements:待集成到gala-gopher的观测指标 + - table_name: 数据表名称 + - entity_name: 观测对象名称 + - fields:数据字段 + - description:数据字段描述信息 + - type:数据字段类型,需和cAdvisor上报数据类型一致 + - name:数据字段名称,需和cAdvisor上报数据名称一致 + +> 说明:cadvisor_probe.conf和cadvisor_probe.meta的字段需要一致。例外:若conf中type字段为counter,在meta中对应type字段应为gauge + + +### pg_stat_probe.conf + +`pg_stat_probe.conf`文件的安装路径为 `/opt/gala-gopher/extend_probes/pg_stat_probe.conf`。该文件配置项说明如下: + +- servers:PostgreSQL服务端配置 + - ip:服务端IP + - port:服务端端口 + - dbname:服务端任意数据库名称 + - user:用户名 + - password:用户密码 + +上述配置用户需能够访问pg_stat_database视图,配置最小权限的命令如下: + +PostgreSQL: +```shell +grant SELECT ON pg_stat_database to ; +grant pg_monitor to ; +``` + +GaussDB: +```shell +grant usage on schema dbe_perf to ; +grant select on pg_stat_replication to ; +``` + + ## 启动参数介绍 diff --git a/src/common/util.c b/src/common/util.c index 1575546..e25e9ee 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -24,7 +24,7 @@ char *get_cur_date(void) { - /* return date str, ex: 2021/5/17 */ + /* return date str, ex: 2021/05/17 */ static char tm[TM_STR_LEN] = {0}; struct tm *tmp_ptr = NULL; time_t t; @@ -34,7 +34,7 @@ char *get_cur_date(void) tmp_ptr = localtime(&t); (void)snprintf(tm, TM_STR_LEN, - "%d-%d-%d", + "%d-%02d-%02d", (1900 + tmp_ptr->tm_year), (1 + tmp_ptr->tm_mon), tmp_ptr->tm_mday); @@ -43,7 +43,7 @@ char *get_cur_date(void) char *get_cur_time(void) { - /* return time str, ex: 2021/5/17 19:56:03 */ + /* return time str, ex: 2021/05/17 19:56:03 */ static char tm[TM_STR_LEN] = {0}; struct tm *tmp_ptr = NULL; time_t t; @@ -53,7 +53,7 @@ char *get_cur_time(void) tmp_ptr = localtime(&t); (void)snprintf(tm, TM_STR_LEN, - "%d-%d-%d-%02d-%02d-%02d", + "%d-%02d-%02d-%02d-%02d-%02d", (1900 + tmp_ptr->tm_year), (1 + tmp_ptr->tm_mon), tmp_ptr->tm_mday, diff --git a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf index 215bb70..3027d4f 100644 --- a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf +++ b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf @@ -189,12 +189,12 @@ measurements: name: "container_id", }, { - description: "...", + description: "failure type", type: "label", name: "failure_type", }, { - description: "...", + description: "scope", type: "label", name: "scope", }, diff --git a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta index 598d585..178c750 100644 --- a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta +++ b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta @@ -189,12 +189,12 @@ measurements: name: "container_id", }, { - description: "...", + description: "failure type", type: "label", name: "failure_type", }, { - description: "...", + description: "scope", type: "label", name: "scope", }, diff --git a/src/probes/extends/python.probe/cadvisor.probe/readme.md b/src/probes/extends/python.probe/cadvisor.probe/readme.md new file mode 100644 index 0000000..62a5532 --- /dev/null +++ b/src/probes/extends/python.probe/cadvisor.probe/readme.md @@ -0,0 +1,49 @@ +# cadvisor 探针开发说明 + +## 功能描述 + +集成容器性能分析工具[cAdvisor](https://github.com/google/cadvisor)的统计数据。支持的功能有: + +- 设置cAdvisor监听端口(必需) + + 通过-p参数设置,无默认值,示例: + + `python3 cadvisor_probe.py -p 8080` + + 表示监控cAdvisor输出,若cAdvisor未启动,则通过`cadvisor -port 8080`启动cAdvisor + +- 设置观测周期 + + 通过-d参数设置,单位为秒,默认值5,示例: + + `python3 cadvisor_probe.py -p 8080 -d 5` + + 表示每隔5s输出统计信息 + +- 开启观测白名单 + + 通过-F参数设置,配置为`task`表示按照`gala-gopher-app.conf`过滤,配置为具体进程的pid表示仅监控此进程,不配置则观测所有进程,默认不配置,示例: + + `python3 cadvisor_probe.py -p 8080 -F task` + + 表示只观测`gala-gopher-app.conf`中的进程 + + `python3 cadvisor_probe.py -p 8080 -F 1234` + + 表示只观测pid为1234的进程 + +- 设置容器观测指标 + + 通过cadvisor_probe.conf和cadvisor_probe.meta配置,二者需对应。配置方法详见[conf_introduction.md](../../../../../doc/conf_introduction.md#cadvisor_probe.conf) + +- 容器运行信息监控,具体的观测指标信息参见`cadvisor_probe.meta`。 + +## 采集方案 + +拉起cAdvisor进程,并监控[cAdvisor原始Prometheus统计数据](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md), +采集cadvisor_probe.conf中配置的统计项,将数据格式转换后按照cadvisor_probe.meta输出为gala-gopher框架支持的格式。 + +## 约束条件 + +- 需要预先安装cAdvisor + diff --git a/src/probes/extends/python.probe/pg_stat.probe/readme.md b/src/probes/extends/python.probe/pg_stat.probe/readme.md new file mode 100644 index 0000000..1ddd6b7 --- /dev/null +++ b/src/probes/extends/python.probe/pg_stat.probe/readme.md @@ -0,0 +1,23 @@ +# pg_stat 探针开发说明 + +## 功能描述 + +获取PostgreSQL Sever的TPS统计数据。支持的功能有: + +- 设置被观测服务端信息 + + 通过pg_stat_probe.conf设置,支持多服务端,配置方法详见[conf_introduction.md](../../../../../doc/conf_introduction.md#pg_stat_probe.conf) + +- 设置观测周期 + + 通过-d参数设置,单位为秒,默认值5,示例: + + `python3 pg_stat_probe.py -d 5` + + 表示每隔5s输出统计信息 + +- 观测PostgreSQL Sever中各数据库的TPS统计数据,具体的观测指标信息参见`pg_stat_probe.meta` + +## 采集方案 + +通过计算数据库已提交的事务数在单位时间内的增长来计算TPS -- 2.33.0