!26 Upgrade anteater version to 1.1.0
From: @li-zhenxing2022 Reviewed-by: @Vchanger Signed-off-by: @Vchanger
This commit is contained in:
commit
3f7be9aee3
@ -1,478 +0,0 @@
|
|||||||
From ac1383471f72420e3320eb7c7999021f3658fb7d Mon Sep 17 00:00:00 2001
|
|
||||||
From: lizhenxing11 <lizhenxing11@huawei.com>
|
|
||||||
Date: Wed, 7 Dec 2022 16:59:15 +0800
|
|
||||||
Subject: [PATCH] Add disk throughput detector
|
|
||||||
|
|
||||||
add keywords
|
|
||||||
|
|
||||||
extract cause metric to the attributes
|
|
||||||
|
|
||||||
update template
|
|
||||||
---
|
|
||||||
anteater/config.py | 3 -
|
|
||||||
anteater/core/kpi.py | 1 +
|
|
||||||
anteater/main.py | 2 +
|
|
||||||
anteater/model/algorithms/three_sigma.py | 2 +-
|
|
||||||
anteater/module/base.py | 6 +-
|
|
||||||
anteater/module/sys/disk_throughput.py | 62 +++++++++++++
|
|
||||||
anteater/module/sys/proc_io_latency.py | 4 +-
|
|
||||||
anteater/source/anomaly_report.py | 3 +-
|
|
||||||
anteater/template/app_anomaly_template.py | 4 +-
|
|
||||||
anteater/template/sys_anomaly_template.py | 4 +-
|
|
||||||
anteater/template/template.py | 3 +-
|
|
||||||
anteater/utils/data_load.py | 2 +
|
|
||||||
config/module/app_sli_rtt.json | 3 +
|
|
||||||
config/module/disk_throughput.json | 92 +++++++++++++++++++
|
|
||||||
config/module/proc_io_latency.json | 3 +
|
|
||||||
config/module/sys_io_latency.json | 3 +
|
|
||||||
config/module/sys_tcp_establish.json | 3 +
|
|
||||||
.../module/sys_tcp_transmission_latency.json | 3 +
|
|
||||||
.../sys_tcp_transmission_throughput.json | 3 +
|
|
||||||
19 files changed, 193 insertions(+), 13 deletions(-)
|
|
||||||
create mode 100644 anteater/module/sys/disk_throughput.py
|
|
||||||
create mode 100644 config/module/disk_throughput.json
|
|
||||||
|
|
||||||
diff --git a/anteater/config.py b/anteater/config.py
|
|
||||||
index ea02702..e9ab557 100644
|
|
||||||
--- a/anteater/config.py
|
|
||||||
+++ b/anteater/config.py
|
|
||||||
@@ -81,9 +81,6 @@ class AnteaterConf:
|
|
||||||
"""Loads config from yaml file"""
|
|
||||||
data_path = os.path.realpath(data_path)
|
|
||||||
|
|
||||||
- if not os.path.exists(data_path):
|
|
||||||
- os.makedirs(data_path)
|
|
||||||
-
|
|
||||||
try:
|
|
||||||
with open(os.path.join(data_path, "config", self.filename), "rb") as f:
|
|
||||||
result = yaml.safe_load(f)
|
|
||||||
diff --git a/anteater/core/kpi.py b/anteater/core/kpi.py
|
|
||||||
index 5a9d8ab..3480139 100644
|
|
||||||
--- a/anteater/core/kpi.py
|
|
||||||
+++ b/anteater/core/kpi.py
|
|
||||||
@@ -48,6 +48,7 @@ class ModelConfig:
|
|
||||||
class JobConfig:
|
|
||||||
name: str
|
|
||||||
job_type: str
|
|
||||||
+ keywords: List[str]
|
|
||||||
root_cause_number: int
|
|
||||||
kpis: List[KPI]
|
|
||||||
features: List[Feature]
|
|
||||||
diff --git a/anteater/main.py b/anteater/main.py
|
|
||||||
index 11e0409..ba7be70 100644
|
|
||||||
--- a/anteater/main.py
|
|
||||||
+++ b/anteater/main.py
|
|
||||||
@@ -21,6 +21,7 @@ from apscheduler.schedulers.blocking import BlockingScheduler
|
|
||||||
from anteater.anomaly_detection import AnomalyDetection
|
|
||||||
from anteater.config import AnteaterConf
|
|
||||||
from anteater.module.app.app_sli_detector import APPSliDetector
|
|
||||||
+from anteater.module.sys.disk_throughput import DiskThroughputDetector
|
|
||||||
from anteater.module.sys.proc_io_latency import ProcIOLatencyDetector
|
|
||||||
from anteater.module.sys.sys_io_latency import SysIOLatencyDetector
|
|
||||||
from anteater.module.sys.tcp_establish import SysTcpEstablishDetector
|
|
||||||
@@ -57,6 +58,7 @@ def main():
|
|
||||||
SysTcpTransmissionLatencyDetector(loader, report),
|
|
||||||
SysIOLatencyDetector(loader, report),
|
|
||||||
ProcIOLatencyDetector(loader, report),
|
|
||||||
+ DiskThroughputDetector(loader, report),
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
diff --git a/anteater/model/algorithms/three_sigma.py b/anteater/model/algorithms/three_sigma.py
|
|
||||||
index 457b606..49b9952 100644
|
|
||||||
--- a/anteater/model/algorithms/three_sigma.py
|
|
||||||
+++ b/anteater/model/algorithms/three_sigma.py
|
|
||||||
@@ -31,7 +31,7 @@ def three_sigma(values, obs_size, n=3, method="abs"):
|
|
||||||
elif method == 'min':
|
|
||||||
outlier = [val for val in obs_val if val < mean - n * std]
|
|
||||||
elif method == 'max':
|
|
||||||
- outlier = [val for val in obs_val if val > mean + 3 * std]
|
|
||||||
+ outlier = [val for val in obs_val if val > mean + n * std]
|
|
||||||
else:
|
|
||||||
raise ValueError(f'Unknown method {method}')
|
|
||||||
|
|
||||||
diff --git a/anteater/module/base.py b/anteater/module/base.py
|
|
||||||
index 7b5fc84..63436ac 100644
|
|
||||||
--- a/anteater/module/base.py
|
|
||||||
+++ b/anteater/module/base.py
|
|
||||||
@@ -48,14 +48,14 @@ class E2EDetector:
|
|
||||||
for detector in self.detectors:
|
|
||||||
anomalies = detector.execute(self.job_config)
|
|
||||||
for anomaly in anomalies:
|
|
||||||
- self.report(anomaly)
|
|
||||||
+ self.report(anomaly, self.job_config.keywords)
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def parse_cause_metrics(self, anomaly: Anomaly) -> List[Dict]:
|
|
||||||
"""Parses the cause metrics into the specific formats"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
- def report(self, anomaly: Anomaly):
|
|
||||||
+ def report(self, anomaly: Anomaly, keywords):
|
|
||||||
"""Parses the anomaly into a specific formats
|
|
||||||
based on the template and reports parsed results
|
|
||||||
"""
|
|
||||||
@@ -63,4 +63,4 @@ class E2EDetector:
|
|
||||||
timestamp = dt.utc_now()
|
|
||||||
template = self.template(timestamp, anomaly.machine_id,
|
|
||||||
anomaly.metric, anomaly.entity_name)
|
|
||||||
- self.reporter.sent_anomaly(anomaly, cause_metrics, template)
|
|
||||||
+ self.reporter.sent_anomaly(anomaly, cause_metrics, keywords, template)
|
|
||||||
diff --git a/anteater/module/sys/disk_throughput.py b/anteater/module/sys/disk_throughput.py
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..9a192fb
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/anteater/module/sys/disk_throughput.py
|
|
||||||
@@ -0,0 +1,62 @@
|
|
||||||
+#!/usr/bin/python3
|
|
||||||
+# ******************************************************************************
|
|
||||||
+# Copyright (c) 2022 Huawei Technologies Co., Ltd.
|
|
||||||
+# gala-anteater is licensed under Mulan PSL v2.
|
|
||||||
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|
||||||
+# You may obtain a copy of Mulan PSL v2 at:
|
|
||||||
+# http://license.coscl.org.cn/MulanPSL2
|
|
||||||
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
||||||
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
||||||
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
||||||
+# See the Mulan PSL v2 for more details.
|
|
||||||
+# ******************************************************************************/
|
|
||||||
+
|
|
||||||
+from typing import List, Dict
|
|
||||||
+
|
|
||||||
+from anteater.core.anomaly import Anomaly
|
|
||||||
+from anteater.module.base import E2EDetector
|
|
||||||
+from anteater.model.detector.online_vae_detector import OnlineVAEDetector
|
|
||||||
+from anteater.model.detector.n_sigma_detector import NSigmaDetector
|
|
||||||
+from anteater.source.anomaly_report import AnomalyReport
|
|
||||||
+from anteater.source.metric_loader import MetricLoader
|
|
||||||
+from anteater.template.sys_anomaly_template import SysAnomalyTemplate
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+class DiskThroughputDetector(E2EDetector):
|
|
||||||
+ """Disk throughput e2e detector which detects the disk read or write
|
|
||||||
+ await time performance deteriorates
|
|
||||||
+ """
|
|
||||||
+
|
|
||||||
+ config_file = 'disk_throughput.json'
|
|
||||||
+
|
|
||||||
+ def __init__(self, data_loader: MetricLoader, reporter: AnomalyReport):
|
|
||||||
+ """The disk throughput e2e detector initializer"""
|
|
||||||
+ super().__init__(reporter, SysAnomalyTemplate)
|
|
||||||
+
|
|
||||||
+ self.detectors = self.init_detectors(data_loader)
|
|
||||||
+
|
|
||||||
+ def init_detectors(self, data_loader):
|
|
||||||
+ if self.job_config.model_config.enable:
|
|
||||||
+ detectors = [
|
|
||||||
+ NSigmaDetector(data_loader, method='max'),
|
|
||||||
+ OnlineVAEDetector(data_loader, self.job_config.model_config)
|
|
||||||
+ ]
|
|
||||||
+ else:
|
|
||||||
+ detectors = [
|
|
||||||
+ NSigmaDetector(data_loader, method='max')
|
|
||||||
+ ]
|
|
||||||
+
|
|
||||||
+ return detectors
|
|
||||||
+
|
|
||||||
+ def parse_cause_metrics(self, anomaly: Anomaly) -> List[Dict]:
|
|
||||||
+ """Parses the cause metrics into the specific formats"""
|
|
||||||
+ cause_metrics = [
|
|
||||||
+ {
|
|
||||||
+ 'metric': cause.ts.metric,
|
|
||||||
+ 'labels': cause.ts.labels,
|
|
||||||
+ 'score': cause.score,
|
|
||||||
+ 'description': cause.description.format(
|
|
||||||
+ cause.ts.labels.get('disk_name', ''))}
|
|
||||||
+ for cause in anomaly.root_causes]
|
|
||||||
+
|
|
||||||
+ return cause_metrics
|
|
||||||
diff --git a/anteater/module/sys/proc_io_latency.py b/anteater/module/sys/proc_io_latency.py
|
|
||||||
index 94fd05d..43e069f 100644
|
|
||||||
--- a/anteater/module/sys/proc_io_latency.py
|
|
||||||
+++ b/anteater/module/sys/proc_io_latency.py
|
|
||||||
@@ -38,12 +38,12 @@ class ProcIOLatencyDetector(E2EDetector):
|
|
||||||
def init_detectors(self, data_loader):
|
|
||||||
if self.job_config.model_config.enable:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='min'),
|
|
||||||
+ NSigmaDetector(data_loader, method='abs'),
|
|
||||||
OnlineVAEDetector(data_loader, self.job_config.model_config)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='min')
|
|
||||||
+ NSigmaDetector(data_loader, method='abs')
|
|
||||||
]
|
|
||||||
|
|
||||||
return detectors
|
|
||||||
diff --git a/anteater/source/anomaly_report.py b/anteater/source/anomaly_report.py
|
|
||||||
index b226763..3d3bb09 100644
|
|
||||||
--- a/anteater/source/anomaly_report.py
|
|
||||||
+++ b/anteater/source/anomaly_report.py
|
|
||||||
@@ -42,7 +42,7 @@ class AnomalyReport:
|
|
||||||
|
|
||||||
return keys
|
|
||||||
|
|
||||||
- def sent_anomaly(self, anomaly: Anomaly, cause_metrics: List, template: Template):
|
|
||||||
+ def sent_anomaly(self, anomaly: Anomaly, cause_metrics: List, keywords: List[str], template: Template):
|
|
||||||
keys = self.get_keys(template.entity_name)
|
|
||||||
machine_id = template.machine_id
|
|
||||||
entity_name = template.entity_name
|
|
||||||
@@ -54,6 +54,7 @@ class AnomalyReport:
|
|
||||||
template.keys = keys
|
|
||||||
template.description = anomaly.description
|
|
||||||
template.cause_metrics = cause_metrics
|
|
||||||
+ template.keywords = keywords
|
|
||||||
|
|
||||||
msg = template.get_template()
|
|
||||||
self.provider.send_message(msg)
|
|
||||||
diff --git a/anteater/template/app_anomaly_template.py b/anteater/template/app_anomaly_template.py
|
|
||||||
index 5b8caf8..a509c96 100644
|
|
||||||
--- a/anteater/template/app_anomaly_template.py
|
|
||||||
+++ b/anteater/template/app_anomaly_template.py
|
|
||||||
@@ -31,7 +31,9 @@ class AppAnomalyTemplate(Template):
|
|
||||||
'entity_id': self.entity_id,
|
|
||||||
'event_id': f'{timestamp}_{self.entity_id}',
|
|
||||||
'event_type': 'app',
|
|
||||||
- 'event_source': 'gala-anteater'
|
|
||||||
+ 'event_source': 'gala-anteater',
|
|
||||||
+ 'keywords': self.keywords,
|
|
||||||
+ 'cause_metric': self.cause_metrics[0] if self.cause_metrics else {'description': 'Unknown'}
|
|
||||||
},
|
|
||||||
'Resource': {
|
|
||||||
'metric': self.metric,
|
|
||||||
diff --git a/anteater/template/sys_anomaly_template.py b/anteater/template/sys_anomaly_template.py
|
|
||||||
index 1083fb3..4ac6abb 100644
|
|
||||||
--- a/anteater/template/sys_anomaly_template.py
|
|
||||||
+++ b/anteater/template/sys_anomaly_template.py
|
|
||||||
@@ -31,7 +31,9 @@ class SysAnomalyTemplate(Template):
|
|
||||||
'entity_id': self.entity_id,
|
|
||||||
'event_id': f'{timestamp}_{self.entity_id}',
|
|
||||||
'event_type': 'sys',
|
|
||||||
- 'event_source': 'gala-anteater'
|
|
||||||
+ 'event_source': 'gala-anteater',
|
|
||||||
+ 'keywords': self.keywords,
|
|
||||||
+ 'cause_metric': self.cause_metrics[0] if self.cause_metrics else {'description': 'Unknown'}
|
|
||||||
},
|
|
||||||
'Resource': {
|
|
||||||
'metric': self.metric,
|
|
||||||
diff --git a/anteater/template/template.py b/anteater/template/template.py
|
|
||||||
index 9e4461a..794c121 100644
|
|
||||||
--- a/anteater/template/template.py
|
|
||||||
+++ b/anteater/template/template.py
|
|
||||||
@@ -26,7 +26,8 @@ class Template:
|
|
||||||
self.labels = {}
|
|
||||||
self.entity_id = ""
|
|
||||||
self.description = ""
|
|
||||||
- self.cause_metrics = {}
|
|
||||||
+ self.cause_metrics = []
|
|
||||||
+ self.keywords = []
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_template(self):
|
|
||||||
diff --git a/anteater/utils/data_load.py b/anteater/utils/data_load.py
|
|
||||||
index 6ac92c7..b6991c6 100644
|
|
||||||
--- a/anteater/utils/data_load.py
|
|
||||||
+++ b/anteater/utils/data_load.py
|
|
||||||
@@ -45,6 +45,7 @@ def load_job_config(file_name) -> JobConfig:
|
|
||||||
|
|
||||||
name = config['name']
|
|
||||||
job_type = config['job_type']
|
|
||||||
+ keywords = config['keywords']
|
|
||||||
root_cause_number = config['root_cause_number']
|
|
||||||
kpis = [KPI(**_conf) for _conf in config['KPI']]
|
|
||||||
features = [Feature(**_conf) for _conf in config['Features']]
|
|
||||||
@@ -74,6 +75,7 @@ def load_job_config(file_name) -> JobConfig:
|
|
||||||
return JobConfig(
|
|
||||||
name=name,
|
|
||||||
job_type=job_type,
|
|
||||||
+ keywords=keywords,
|
|
||||||
root_cause_number=root_cause_number,
|
|
||||||
kpis=kpis,
|
|
||||||
features=features,
|
|
||||||
diff --git a/config/module/app_sli_rtt.json b/config/module/app_sli_rtt.json
|
|
||||||
index 7c05094..db29392 100644
|
|
||||||
--- a/config/module/app_sli_rtt.json
|
|
||||||
+++ b/config/module/app_sli_rtt.json
|
|
||||||
@@ -1,6 +1,9 @@
|
|
||||||
{
|
|
||||||
"name": "app_sli_rtt",
|
|
||||||
"job_type": "app",
|
|
||||||
+ "keywords": [
|
|
||||||
+ "app"
|
|
||||||
+ ],
|
|
||||||
"root_cause_number": 20,
|
|
||||||
"KPI": [
|
|
||||||
{
|
|
||||||
diff --git a/config/module/disk_throughput.json b/config/module/disk_throughput.json
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..00276c0
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/config/module/disk_throughput.json
|
|
||||||
@@ -0,0 +1,92 @@
|
|
||||||
+{
|
|
||||||
+ "name": "disk_throughput",
|
|
||||||
+ "job_type": "sys",
|
|
||||||
+ "keywords": [
|
|
||||||
+ "disk"
|
|
||||||
+ ],
|
|
||||||
+ "root_cause_number": 1,
|
|
||||||
+ "KPI": [
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_disk_r_await",
|
|
||||||
+ "kpi_type": "",
|
|
||||||
+ "entity_name": "disk",
|
|
||||||
+ "enable": true,
|
|
||||||
+ "description": "Disk read await time is increasing!",
|
|
||||||
+ "params": {
|
|
||||||
+ "look_back": 20,
|
|
||||||
+ "obs_size": 25,
|
|
||||||
+ "outlier_ratio_th": 0.3,
|
|
||||||
+ "smooth_params": {
|
|
||||||
+ "method": "conv_smooth",
|
|
||||||
+ "box_pts": 3
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ },
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_disk_w_await",
|
|
||||||
+ "kpi_type": "",
|
|
||||||
+ "entity_name": "disk",
|
|
||||||
+ "enable": true,
|
|
||||||
+ "description": "Disk write await time is increasing!",
|
|
||||||
+ "params": {
|
|
||||||
+ "look_back": 20,
|
|
||||||
+ "obs_size": 25,
|
|
||||||
+ "outlier_ratio_th": 0.3,
|
|
||||||
+ "smooth_params": {
|
|
||||||
+ "method": "conv_smooth",
|
|
||||||
+ "box_pts": 3
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ ],
|
|
||||||
+ "OnlineModel": {
|
|
||||||
+ "name": "online_vae_model",
|
|
||||||
+ "enable": false,
|
|
||||||
+ "params": {
|
|
||||||
+ "th": 0.5,
|
|
||||||
+ "max_error_rate": 0.7,
|
|
||||||
+ "min_retrain_hours": 24,
|
|
||||||
+ "min_predict_minutes": 20,
|
|
||||||
+ "norm": {},
|
|
||||||
+ "vae": {
|
|
||||||
+ "hidden_sizes": [25, 10, 5],
|
|
||||||
+ "latent_size": 5,
|
|
||||||
+ "dropout_rate": 0.25,
|
|
||||||
+ "batch_size": 1024,
|
|
||||||
+ "num_epochs": 30,
|
|
||||||
+ "learning_rate": 0.001,
|
|
||||||
+ "k": 120,
|
|
||||||
+ "step_size": 60,
|
|
||||||
+ "num_eval_samples": 10
|
|
||||||
+ },
|
|
||||||
+ "calibrate": {},
|
|
||||||
+ "threshold": {}
|
|
||||||
+ }
|
|
||||||
+ },
|
|
||||||
+ "Features": [
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_disk_rspeed_kB",
|
|
||||||
+ "priority": 0,
|
|
||||||
+ "description": "The disk I/O await time performance deteriorates due to read throughput rise (read kbytes/second).(Disk = {})",
|
|
||||||
+ "atrend": "rise"
|
|
||||||
+ },
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_disk_wspeed_kB",
|
|
||||||
+ "priority": 0,
|
|
||||||
+ "description": "The disk I/O await time performance deteriorates due to write throughput rise (write kbytes/second).(Disk = {})",
|
|
||||||
+ "atrend": "rise"
|
|
||||||
+ },
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_disk_rareq",
|
|
||||||
+ "priority": 0,
|
|
||||||
+ "description": "The disk I/O await time performance deteriorates due to read saturation rise.(Disk = {})",
|
|
||||||
+ "atrend": "rise"
|
|
||||||
+ },
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_disk_wareq",
|
|
||||||
+ "priority": 0,
|
|
||||||
+ "description": "The disk I/O await time performance deteriorates due to write saturation rise.(Disk = {})",
|
|
||||||
+ "atrend": "rise"
|
|
||||||
+ }
|
|
||||||
+ ]
|
|
||||||
+}
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/config/module/proc_io_latency.json b/config/module/proc_io_latency.json
|
|
||||||
index c45b7df..c6c03c1 100644
|
|
||||||
--- a/config/module/proc_io_latency.json
|
|
||||||
+++ b/config/module/proc_io_latency.json
|
|
||||||
@@ -1,6 +1,9 @@
|
|
||||||
{
|
|
||||||
"name": "proc_io_latency",
|
|
||||||
"job_type": "sys",
|
|
||||||
+ "keywords": [
|
|
||||||
+ "process"
|
|
||||||
+ ],
|
|
||||||
"root_cause_number": 3,
|
|
||||||
"KPI": [
|
|
||||||
{
|
|
||||||
diff --git a/config/module/sys_io_latency.json b/config/module/sys_io_latency.json
|
|
||||||
index e92dd4c..e58990d 100644
|
|
||||||
--- a/config/module/sys_io_latency.json
|
|
||||||
+++ b/config/module/sys_io_latency.json
|
|
||||||
@@ -1,6 +1,9 @@
|
|
||||||
{
|
|
||||||
"name": "sys_io_latency",
|
|
||||||
"job_type": "sys",
|
|
||||||
+ "keywords": [
|
|
||||||
+ "block"
|
|
||||||
+ ],
|
|
||||||
"root_cause_number": 3,
|
|
||||||
"KPI": [
|
|
||||||
{
|
|
||||||
diff --git a/config/module/sys_tcp_establish.json b/config/module/sys_tcp_establish.json
|
|
||||||
index b6f8eb4..61ae72d 100644
|
|
||||||
--- a/config/module/sys_tcp_establish.json
|
|
||||||
+++ b/config/module/sys_tcp_establish.json
|
|
||||||
@@ -1,6 +1,9 @@
|
|
||||||
{
|
|
||||||
"name": "sys_tcp_establish",
|
|
||||||
"job_type": "sys",
|
|
||||||
+ "keywords": [
|
|
||||||
+ "tcp"
|
|
||||||
+ ],
|
|
||||||
"root_cause_number": 3,
|
|
||||||
"KPI": [
|
|
||||||
{
|
|
||||||
diff --git a/config/module/sys_tcp_transmission_latency.json b/config/module/sys_tcp_transmission_latency.json
|
|
||||||
index 4927d8e..d9e7f80 100644
|
|
||||||
--- a/config/module/sys_tcp_transmission_latency.json
|
|
||||||
+++ b/config/module/sys_tcp_transmission_latency.json
|
|
||||||
@@ -1,6 +1,9 @@
|
|
||||||
{
|
|
||||||
"name": "sys_tcp_transmission_latency",
|
|
||||||
"job_type": "sys",
|
|
||||||
+ "keywords": [
|
|
||||||
+ "tcp"
|
|
||||||
+ ],
|
|
||||||
"root_cause_number": 3,
|
|
||||||
"KPI": [
|
|
||||||
{
|
|
||||||
diff --git a/config/module/sys_tcp_transmission_throughput.json b/config/module/sys_tcp_transmission_throughput.json
|
|
||||||
index 060f640..28ee784 100644
|
|
||||||
--- a/config/module/sys_tcp_transmission_throughput.json
|
|
||||||
+++ b/config/module/sys_tcp_transmission_throughput.json
|
|
||||||
@@ -1,6 +1,9 @@
|
|
||||||
{
|
|
||||||
"name": "sys_tcp_transmission_throughput",
|
|
||||||
"job_type": "sys",
|
|
||||||
+ "keywords": [
|
|
||||||
+ "net"
|
|
||||||
+ ],
|
|
||||||
"root_cause_number": 3,
|
|
||||||
"KPI": [
|
|
||||||
{
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
@ -1,377 +0,0 @@
|
|||||||
From dd870b17120f3c7961c4613d454f1653fbd42214 Mon Sep 17 00:00:00 2001
|
|
||||||
From: lizhenxing11 <lizhenxing11@huawei.com>
|
|
||||||
Date: Tue, 27 Dec 2022 18:39:32 +0800
|
|
||||||
Subject: [PATCH] Update TCP Establish Model & Add Nic Loss Detector
|
|
||||||
|
|
||||||
change method 'abs' to 'max'
|
|
||||||
---
|
|
||||||
anteater/main.py | 2 +
|
|
||||||
anteater/model/algorithms/three_sigma.py | 4 +-
|
|
||||||
anteater/model/detector/n_sigma_detector.py | 4 +-
|
|
||||||
.../tcp_establish_n_sigma_detector.py | 12 +++-
|
|
||||||
anteater/model/detector/th_base_detector.py | 66 +++++++++++++++++++
|
|
||||||
anteater/module/sys/nic_loss.py | 59 +++++++++++++++++
|
|
||||||
anteater/module/sys/proc_io_latency.py | 4 +-
|
|
||||||
anteater/template/app_anomaly_template.py | 2 +
|
|
||||||
anteater/template/sys_anomaly_template.py | 1 +
|
|
||||||
config/module/sys_nic_loss.json | 53 +++++++++++++++
|
|
||||||
config/module/sys_tcp_establish.json | 3 +-
|
|
||||||
11 files changed, 200 insertions(+), 10 deletions(-)
|
|
||||||
create mode 100644 anteater/model/detector/th_base_detector.py
|
|
||||||
create mode 100644 anteater/module/sys/nic_loss.py
|
|
||||||
create mode 100644 config/module/sys_nic_loss.json
|
|
||||||
|
|
||||||
diff --git a/anteater/main.py b/anteater/main.py
|
|
||||||
index ba7be70..4de72f9 100644
|
|
||||||
--- a/anteater/main.py
|
|
||||||
+++ b/anteater/main.py
|
|
||||||
@@ -22,6 +22,7 @@ from anteater.anomaly_detection import AnomalyDetection
|
|
||||||
from anteater.config import AnteaterConf
|
|
||||||
from anteater.module.app.app_sli_detector import APPSliDetector
|
|
||||||
from anteater.module.sys.disk_throughput import DiskThroughputDetector
|
|
||||||
+from anteater.module.sys.nic_loss import NICLossDetector
|
|
||||||
from anteater.module.sys.proc_io_latency import ProcIOLatencyDetector
|
|
||||||
from anteater.module.sys.sys_io_latency import SysIOLatencyDetector
|
|
||||||
from anteater.module.sys.tcp_establish import SysTcpEstablishDetector
|
|
||||||
@@ -59,6 +60,7 @@ def main():
|
|
||||||
SysIOLatencyDetector(loader, report),
|
|
||||||
ProcIOLatencyDetector(loader, report),
|
|
||||||
DiskThroughputDetector(loader, report),
|
|
||||||
+ NICLossDetector(loader, report),
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
diff --git a/anteater/model/algorithms/three_sigma.py b/anteater/model/algorithms/three_sigma.py
|
|
||||||
index 49b9952..0865417 100644
|
|
||||||
--- a/anteater/model/algorithms/three_sigma.py
|
|
||||||
+++ b/anteater/model/algorithms/three_sigma.py
|
|
||||||
@@ -14,8 +14,8 @@
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
-def three_sigma(values, obs_size, n=3, method="abs"):
|
|
||||||
- """The '3-sigma rule' outlier detect function"""
|
|
||||||
+def n_sigma(values, obs_size, n=3, method="abs"):
|
|
||||||
+ """The 'N-sigma rule' outlier detect function"""
|
|
||||||
if obs_size <= 0:
|
|
||||||
raise ValueError("The obs_size should great than zero!")
|
|
||||||
if len(values) <= obs_size:
|
|
||||||
diff --git a/anteater/model/detector/n_sigma_detector.py b/anteater/model/detector/n_sigma_detector.py
|
|
||||||
index f632326..3a2ab01 100644
|
|
||||||
--- a/anteater/model/detector/n_sigma_detector.py
|
|
||||||
+++ b/anteater/model/detector/n_sigma_detector.py
|
|
||||||
@@ -19,7 +19,7 @@ from anteater.core.kpi import KPI
|
|
||||||
from anteater.core.time_series import TimeSeriesScore
|
|
||||||
from anteater.model.detector.base import Detector
|
|
||||||
from anteater.model.algorithms.smooth import smoothing
|
|
||||||
-from anteater.model.algorithms.three_sigma import three_sigma
|
|
||||||
+from anteater.model.algorithms.three_sigma import n_sigma
|
|
||||||
from anteater.source.metric_loader import MetricLoader
|
|
||||||
from anteater.utils.common import divide
|
|
||||||
from anteater.utils.datetime import DateTimeManager as dt
|
|
||||||
@@ -91,7 +91,7 @@ class NSigmaDetector(Detector):
|
|
||||||
ratio = 0
|
|
||||||
else:
|
|
||||||
smoothed_val = smoothing(_ts.values, **smooth_params)
|
|
||||||
- outlier, mean, std = three_sigma(
|
|
||||||
+ outlier, mean, std = n_sigma(
|
|
||||||
smoothed_val, obs_size=obs_size, n=n, method=self.method)
|
|
||||||
ratio = divide(len(outlier), obs_size)
|
|
||||||
|
|
||||||
diff --git a/anteater/model/detector/tcp_establish_n_sigma_detector.py b/anteater/model/detector/tcp_establish_n_sigma_detector.py
|
|
||||||
index 8dcf9ae..82d7837 100644
|
|
||||||
--- a/anteater/model/detector/tcp_establish_n_sigma_detector.py
|
|
||||||
+++ b/anteater/model/detector/tcp_establish_n_sigma_detector.py
|
|
||||||
@@ -42,8 +42,13 @@ class TcpEstablishNSigmaDetector(Detector):
|
|
||||||
start, _ = dt.last(minutes=look_back)
|
|
||||||
mid, _ = dt.last(minutes=3)
|
|
||||||
|
|
||||||
+ filtered_ts_list = []
|
|
||||||
ts_list = self.data_loader.get_metric(start, mid, kpi.metric)
|
|
||||||
- establish_time = reduce(lambda x, y: x + y, [list(set(_ts.values)) for _ts in ts_list])
|
|
||||||
+ for _ts in ts_list:
|
|
||||||
+ if sum(_ts.values) > 0:
|
|
||||||
+ filtered_ts_list.append(_ts)
|
|
||||||
+
|
|
||||||
+ establish_time = reduce(lambda x, y: x + y, [list(set(_ts.values)) for _ts in filtered_ts_list])
|
|
||||||
|
|
||||||
self.mean = np.mean(establish_time)
|
|
||||||
self.std = np.std(establish_time)
|
|
||||||
@@ -65,6 +70,7 @@ class TcpEstablishNSigmaDetector(Detector):
|
|
||||||
"""Detects kpi based on signal time series anomaly detection model"""
|
|
||||||
outlier_ratio_th = kpi.params.get('outlier_ratio_th')
|
|
||||||
look_back = kpi.params.get('obs_size')
|
|
||||||
+ min_rtt = kpi.params.get('min_rtt')
|
|
||||||
|
|
||||||
start, end = dt.last(minutes=look_back)
|
|
||||||
ts_list = self.data_loader.\
|
|
||||||
@@ -72,9 +78,9 @@ class TcpEstablishNSigmaDetector(Detector):
|
|
||||||
|
|
||||||
anomalies = []
|
|
||||||
for _ts in ts_list:
|
|
||||||
- outlier = [val for val in _ts.values if abs(val - self.mean) > 3 * self.std]
|
|
||||||
+ outlier = [val for val in _ts.values if val > self.mean + 5 * self.std]
|
|
||||||
ratio = divide(len(outlier), len(_ts.values))
|
|
||||||
- if outlier and ratio > outlier_ratio_th:
|
|
||||||
+ if outlier and ratio > outlier_ratio_th and np.average(outlier) >= min_rtt:
|
|
||||||
anomalies.append(
|
|
||||||
Anomaly(
|
|
||||||
machine_id=machine_id,
|
|
||||||
diff --git a/anteater/model/detector/th_base_detector.py b/anteater/model/detector/th_base_detector.py
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..bec9705
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/anteater/model/detector/th_base_detector.py
|
|
||||||
@@ -0,0 +1,66 @@
|
|
||||||
+#!/usr/bin/python3
|
|
||||||
+# ******************************************************************************
|
|
||||||
+# Copyright (c) 2022 Huawei Technologies Co., Ltd.
|
|
||||||
+# gala-anteater is licensed under Mulan PSL v2.
|
|
||||||
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|
||||||
+# You may obtain a copy of Mulan PSL v2 at:
|
|
||||||
+# http://license.coscl.org.cn/MulanPSL2
|
|
||||||
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
||||||
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
||||||
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
||||||
+# See the Mulan PSL v2 for more details.
|
|
||||||
+# ******************************************************************************/
|
|
||||||
+
|
|
||||||
+from typing import List
|
|
||||||
+
|
|
||||||
+from anteater.core.anomaly import Anomaly
|
|
||||||
+from anteater.core.kpi import KPI
|
|
||||||
+from anteater.model.detector.base import Detector
|
|
||||||
+from anteater.source.metric_loader import MetricLoader
|
|
||||||
+from anteater.utils.datetime import DateTimeManager as dt
|
|
||||||
+from anteater.utils.log import logger
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+class ThBaseDetector(Detector):
|
|
||||||
+ """The threshold-based anomaly detector"""
|
|
||||||
+
|
|
||||||
+ def __init__(self, data_loader: MetricLoader):
|
|
||||||
+ """The detector base class initializer"""
|
|
||||||
+ super().__init__(data_loader)
|
|
||||||
+
|
|
||||||
+ def detect_kpis(self, kpis: List[KPI]):
|
|
||||||
+ """Executes anomaly detection on kpis"""
|
|
||||||
+ start, end = dt.last(minutes=1)
|
|
||||||
+ machine_ids = self.get_unique_machine_id(start, end, kpis)
|
|
||||||
+ anomalies = []
|
|
||||||
+ for _id in machine_ids:
|
|
||||||
+ for kpi in kpis:
|
|
||||||
+ anomalies.extend(self.detect_signal_kpi(kpi, _id))
|
|
||||||
+
|
|
||||||
+ return anomalies
|
|
||||||
+
|
|
||||||
+ def detect_signal_kpi(self, kpi, machine_id: str) -> List[Anomaly]:
|
|
||||||
+ """Detects kpi based on threshold based anomaly detection model"""
|
|
||||||
+ look_back = kpi.params.get('look_back')
|
|
||||||
+ th = kpi.params.get('th')
|
|
||||||
+ start, end = dt.last(minutes=look_back)
|
|
||||||
+ ts_list = self.data_loader.\
|
|
||||||
+ get_metric(start, end, kpi.metric, label_name='machine_id', label_value=machine_id)
|
|
||||||
+
|
|
||||||
+ if not ts_list:
|
|
||||||
+ logger.warning(f'Key metric {kpi.metric} is null on the target machine {machine_id}!')
|
|
||||||
+ return []
|
|
||||||
+
|
|
||||||
+ anomalies = [
|
|
||||||
+ Anomaly(
|
|
||||||
+ machine_id=machine_id,
|
|
||||||
+ metric=_ts.metric,
|
|
||||||
+ labels=_ts.labels,
|
|
||||||
+ score=1,
|
|
||||||
+ entity_name=kpi.entity_name,
|
|
||||||
+ description=kpi.description)
|
|
||||||
+ for _ts in ts_list
|
|
||||||
+ if sum(_ts.values) >= th
|
|
||||||
+ ]
|
|
||||||
+
|
|
||||||
+ return anomalies
|
|
||||||
diff --git a/anteater/module/sys/nic_loss.py b/anteater/module/sys/nic_loss.py
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..d24e06f
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/anteater/module/sys/nic_loss.py
|
|
||||||
@@ -0,0 +1,59 @@
|
|
||||||
+#!/usr/bin/python3
|
|
||||||
+# ******************************************************************************
|
|
||||||
+# Copyright (c) 2022 Huawei Technologies Co., Ltd.
|
|
||||||
+# gala-anteater is licensed under Mulan PSL v2.
|
|
||||||
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|
||||||
+# You may obtain a copy of Mulan PSL v2 at:
|
|
||||||
+# http://license.coscl.org.cn/MulanPSL2
|
|
||||||
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
||||||
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
||||||
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
||||||
+# See the Mulan PSL v2 for more details.
|
|
||||||
+# ******************************************************************************/
|
|
||||||
+
|
|
||||||
+from typing import List, Dict
|
|
||||||
+
|
|
||||||
+from anteater.core.anomaly import Anomaly
|
|
||||||
+from anteater.model.detector.th_base_detector import ThBaseDetector
|
|
||||||
+from anteater.module.base import E2EDetector
|
|
||||||
+from anteater.source.anomaly_report import AnomalyReport
|
|
||||||
+from anteater.source.metric_loader import MetricLoader
|
|
||||||
+from anteater.template.sys_anomaly_template import SysAnomalyTemplate
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+class NICLossDetector(E2EDetector):
|
|
||||||
+ """SYS nic loss e2e detector which detects the network loss.
|
|
||||||
+ """
|
|
||||||
+
|
|
||||||
+ config_file = 'sys_nic_loss.json'
|
|
||||||
+
|
|
||||||
+ def __init__(self, data_loader: MetricLoader, reporter: AnomalyReport):
|
|
||||||
+ """The system tcp transmission latency e2e detector initializer"""
|
|
||||||
+ super().__init__(reporter, SysAnomalyTemplate)
|
|
||||||
+
|
|
||||||
+ self.detectors = [
|
|
||||||
+ ThBaseDetector(data_loader)
|
|
||||||
+ ]
|
|
||||||
+
|
|
||||||
+ def parse_cause_metrics(self, anomaly: Anomaly) -> List[Dict]:
|
|
||||||
+ """Parses the cause metrics into the specific formats"""
|
|
||||||
+ cause_metrics = []
|
|
||||||
+ for _cs in anomaly.root_causes:
|
|
||||||
+ tmp = {
|
|
||||||
+ 'metric': _cs.ts.metric,
|
|
||||||
+ 'labels': _cs.ts.labels,
|
|
||||||
+ 'score': _cs.score,
|
|
||||||
+ }
|
|
||||||
+ if 'tcp' in _cs.ts.metric:
|
|
||||||
+ tmp['description'] = _cs.description.format(
|
|
||||||
+ _cs.ts.labels.get('tgid', ''),
|
|
||||||
+ _cs.ts.labels.get('client_port', ''),
|
|
||||||
+ _cs.ts.labels.get('server_ip', ''),
|
|
||||||
+ _cs.ts.labels.get('server_port', ''))
|
|
||||||
+ else:
|
|
||||||
+ tmp['description'] = _cs.description.format(
|
|
||||||
+ _cs.ts.labels.get('dev_name', ''))
|
|
||||||
+
|
|
||||||
+ cause_metrics.append(tmp)
|
|
||||||
+
|
|
||||||
+ return cause_metrics
|
|
||||||
diff --git a/anteater/module/sys/proc_io_latency.py b/anteater/module/sys/proc_io_latency.py
|
|
||||||
index 43e069f..a34c48d 100644
|
|
||||||
--- a/anteater/module/sys/proc_io_latency.py
|
|
||||||
+++ b/anteater/module/sys/proc_io_latency.py
|
|
||||||
@@ -38,12 +38,12 @@ class ProcIOLatencyDetector(E2EDetector):
|
|
||||||
def init_detectors(self, data_loader):
|
|
||||||
if self.job_config.model_config.enable:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='abs'),
|
|
||||||
+ NSigmaDetector(data_loader, method='max'),
|
|
||||||
OnlineVAEDetector(data_loader, self.job_config.model_config)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='abs')
|
|
||||||
+ NSigmaDetector(data_loader, method='max')
|
|
||||||
]
|
|
||||||
|
|
||||||
return detectors
|
|
||||||
diff --git a/anteater/template/app_anomaly_template.py b/anteater/template/app_anomaly_template.py
|
|
||||||
index a509c96..4df4a35 100644
|
|
||||||
--- a/anteater/template/app_anomaly_template.py
|
|
||||||
+++ b/anteater/template/app_anomaly_template.py
|
|
||||||
@@ -46,6 +46,8 @@ class AppAnomalyTemplate(Template):
|
|
||||||
'SeverityNumber': 13,
|
|
||||||
'Body': f'{self.timestamp.strftime("%c")} WARN, APP may be impacting sli performance issues.',
|
|
||||||
'event_id': f'{timestamp}_{self.entity_id}',
|
|
||||||
+ "keywords": self.keywords,
|
|
||||||
+ 'cause_metrics': self.cause_metrics[0] if self.cause_metrics else {'description': 'Unknown'}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
diff --git a/anteater/template/sys_anomaly_template.py b/anteater/template/sys_anomaly_template.py
|
|
||||||
index 4ac6abb..aec6ea0 100644
|
|
||||||
--- a/anteater/template/sys_anomaly_template.py
|
|
||||||
+++ b/anteater/template/sys_anomaly_template.py
|
|
||||||
@@ -46,6 +46,7 @@ class SysAnomalyTemplate(Template):
|
|
||||||
'SeverityNumber': 13,
|
|
||||||
'Body': f'{self.timestamp.strftime("%c")} WARN, SYS may be impacting performance issues.',
|
|
||||||
'event_id': f'{timestamp}_{self.entity_id}',
|
|
||||||
+ "keywords": self.keywords
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
diff --git a/config/module/sys_nic_loss.json b/config/module/sys_nic_loss.json
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..793f82f
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/config/module/sys_nic_loss.json
|
|
||||||
@@ -0,0 +1,53 @@
|
|
||||||
+{
|
|
||||||
+ "name": "sys_tcp_transmission_latency",
|
|
||||||
+ "job_type": "sys",
|
|
||||||
+ "keywords": [
|
|
||||||
+ "net"
|
|
||||||
+ ],
|
|
||||||
+ "root_cause_number": 3,
|
|
||||||
+ "KPI": [
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_nic_tc_sent_drop",
|
|
||||||
+ "kpi_type": "",
|
|
||||||
+ "entity_name": "nic",
|
|
||||||
+ "enable": true,
|
|
||||||
+ "description": "TC发送丢包数异常",
|
|
||||||
+ "params": {
|
|
||||||
+ "look_back": 2,
|
|
||||||
+ "th": 1
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ ],
|
|
||||||
+ "Features": [
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_nic_tx_dropped",
|
|
||||||
+ "priority": 0,
|
|
||||||
+ "description": "网卡发送丢弃的数据包数异常。(dev_name = {})"
|
|
||||||
+ },
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_nic_rx_dropped",
|
|
||||||
+ "priority": 0,
|
|
||||||
+ "description": "网卡接收丢弃的数据包数异常。(dev_name = {})"
|
|
||||||
+ },
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_tcp_link_sk_drops",
|
|
||||||
+ "priority": 3,
|
|
||||||
+ "description": "Packets are lost in the host protocol stack due to unknown causes, and the TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ },
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_tcp_link_retran_packets",
|
|
||||||
+ "priority": 1,
|
|
||||||
+ "description": "TCP retransmission is triggered due to network faults, resulting in TCP performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ },
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_tcp_link_lost_out",
|
|
||||||
+ "priority": 3,
|
|
||||||
+ "description": "The network may be congested, causing abnormal TCP packet loss and performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ },
|
|
||||||
+ {
|
|
||||||
+ "metric": "gala_gopher_tcp_link_notsent_bytes",
|
|
||||||
+ "priority": 4,
|
|
||||||
+ "description": "Due to network delay or peer application performance, too many packets to be sent are accumulated in the sliding window. As a result, TCP performance deteriorates. (PID = {}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ }
|
|
||||||
+ ]
|
|
||||||
+}
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/config/module/sys_tcp_establish.json b/config/module/sys_tcp_establish.json
|
|
||||||
index 61ae72d..2c158c0 100644
|
|
||||||
--- a/config/module/sys_tcp_establish.json
|
|
||||||
+++ b/config/module/sys_tcp_establish.json
|
|
||||||
@@ -15,7 +15,8 @@
|
|
||||||
"params": {
|
|
||||||
"look_back": 30,
|
|
||||||
"outlier_ratio_th": 0.5,
|
|
||||||
- "obs_size": 3
|
|
||||||
+ "obs_size": 3,
|
|
||||||
+ "min_rtt": 500000
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
@ -1,533 +0,0 @@
|
|||||||
From e0e99ac8fc3de9e8781f5d7acd5e9fe1832461b0 Mon Sep 17 00:00:00 2001
|
|
||||||
From: lizhenxing11 <lizhenxing11@huawei.com>
|
|
||||||
Date: Tue, 3 Jan 2023 15:27:45 +0800
|
|
||||||
Subject: [PATCH] add chinese descriptions
|
|
||||||
|
|
||||||
update description
|
|
||||||
|
|
||||||
fix typo
|
|
||||||
|
|
||||||
update th
|
|
||||||
---
|
|
||||||
anteater/core/kpi.py | 2 +-
|
|
||||||
anteater/template/app_anomaly_template.py | 5 ++-
|
|
||||||
anteater/template/sys_anomaly_template.py | 2 +-
|
|
||||||
anteater/utils/data_load.py | 14 ++++++--
|
|
||||||
config/module/app_sli_rtt.json | 2 ++
|
|
||||||
config/module/disk_throughput.json | 6 ++++
|
|
||||||
config/module/proc_io_latency.json | 31 +++++++++++-----
|
|
||||||
config/module/sys_io_latency.json | 25 ++++++++-----
|
|
||||||
config/module/sys_nic_loss.json | 21 +++++++----
|
|
||||||
config/module/sys_tcp_establish.json | 4 ++-
|
|
||||||
.../module/sys_tcp_transmission_latency.json | 36 ++++++++++++-------
|
|
||||||
11 files changed, 104 insertions(+), 44 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/anteater/core/kpi.py b/anteater/core/kpi.py
|
|
||||||
index 3480139..f83b666 100644
|
|
||||||
--- a/anteater/core/kpi.py
|
|
||||||
+++ b/anteater/core/kpi.py
|
|
||||||
@@ -23,7 +23,7 @@ class KPI:
|
|
||||||
kpi_type: str
|
|
||||||
entity_name: str
|
|
||||||
enable: bool
|
|
||||||
- description: str = ""
|
|
||||||
+ description: str
|
|
||||||
params: dict = field(default=dict)
|
|
||||||
atrend: AnomalyTrend = AnomalyTrend.DEFAULT
|
|
||||||
|
|
||||||
diff --git a/anteater/template/app_anomaly_template.py b/anteater/template/app_anomaly_template.py
|
|
||||||
index 4df4a35..3770d2e 100644
|
|
||||||
--- a/anteater/template/app_anomaly_template.py
|
|
||||||
+++ b/anteater/template/app_anomaly_template.py
|
|
||||||
@@ -33,7 +33,7 @@ class AppAnomalyTemplate(Template):
|
|
||||||
'event_type': 'app',
|
|
||||||
'event_source': 'gala-anteater',
|
|
||||||
'keywords': self.keywords,
|
|
||||||
- 'cause_metric': self.cause_metrics[0] if self.cause_metrics else {'description': 'Unknown'}
|
|
||||||
+ 'cause_metric': self.cause_metrics[0] if self.cause_metrics else {'description': self.description}
|
|
||||||
},
|
|
||||||
'Resource': {
|
|
||||||
'metric': self.metric,
|
|
||||||
@@ -46,8 +46,7 @@ class AppAnomalyTemplate(Template):
|
|
||||||
'SeverityNumber': 13,
|
|
||||||
'Body': f'{self.timestamp.strftime("%c")} WARN, APP may be impacting sli performance issues.',
|
|
||||||
'event_id': f'{timestamp}_{self.entity_id}',
|
|
||||||
- "keywords": self.keywords,
|
|
||||||
- 'cause_metrics': self.cause_metrics[0] if self.cause_metrics else {'description': 'Unknown'}
|
|
||||||
+ "keywords": self.keywords
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
diff --git a/anteater/template/sys_anomaly_template.py b/anteater/template/sys_anomaly_template.py
|
|
||||||
index aec6ea0..d3c7e82 100644
|
|
||||||
--- a/anteater/template/sys_anomaly_template.py
|
|
||||||
+++ b/anteater/template/sys_anomaly_template.py
|
|
||||||
@@ -33,7 +33,7 @@ class SysAnomalyTemplate(Template):
|
|
||||||
'event_type': 'sys',
|
|
||||||
'event_source': 'gala-anteater',
|
|
||||||
'keywords': self.keywords,
|
|
||||||
- 'cause_metric': self.cause_metrics[0] if self.cause_metrics else {'description': 'Unknown'}
|
|
||||||
+ 'cause_metric': self.cause_metrics[0] if self.cause_metrics else {'description': self.description}
|
|
||||||
},
|
|
||||||
'Resource': {
|
|
||||||
'metric': self.metric,
|
|
||||||
diff --git a/anteater/utils/data_load.py b/anteater/utils/data_load.py
|
|
||||||
index b6991c6..730c9c6 100644
|
|
||||||
--- a/anteater/utils/data_load.py
|
|
||||||
+++ b/anteater/utils/data_load.py
|
|
||||||
@@ -47,8 +47,9 @@ def load_job_config(file_name) -> JobConfig:
|
|
||||||
job_type = config['job_type']
|
|
||||||
keywords = config['keywords']
|
|
||||||
root_cause_number = config['root_cause_number']
|
|
||||||
- kpis = [KPI(**_conf) for _conf in config['KPI']]
|
|
||||||
- features = [Feature(**_conf) for _conf in config['Features']]
|
|
||||||
+
|
|
||||||
+ kpis = [KPI(**update_description(_conf)) for _conf in config['KPI']]
|
|
||||||
+ features = [Feature(**update_description(_conf)) for _conf in config['Features']]
|
|
||||||
|
|
||||||
model_config = None
|
|
||||||
if 'OnlineModel' in config:
|
|
||||||
@@ -81,3 +82,12 @@ def load_job_config(file_name) -> JobConfig:
|
|
||||||
features=features,
|
|
||||||
model_config=model_config
|
|
||||||
)
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+def update_description(conf: dict):
|
|
||||||
+ """Changes description to zh"""
|
|
||||||
+ if 'description-zh' in conf:
|
|
||||||
+ conf['description'] = conf['description-zh']
|
|
||||||
+ del conf['description-zh']
|
|
||||||
+
|
|
||||||
+ return conf
|
|
||||||
diff --git a/config/module/app_sli_rtt.json b/config/module/app_sli_rtt.json
|
|
||||||
index db29392..0146883 100644
|
|
||||||
--- a/config/module/app_sli_rtt.json
|
|
||||||
+++ b/config/module/app_sli_rtt.json
|
|
||||||
@@ -12,6 +12,7 @@
|
|
||||||
"entity_name": "sli",
|
|
||||||
"enable": false,
|
|
||||||
"description": "sli rtt 异常",
|
|
||||||
+ "description-zh": "应用级请求往返时延(RTT)异常",
|
|
||||||
"params": {
|
|
||||||
"look_back": 10,
|
|
||||||
"obs_size": 25,
|
|
||||||
@@ -28,6 +29,7 @@
|
|
||||||
"entity_name": "sli",
|
|
||||||
"enable": true,
|
|
||||||
"description": "sli tps 异常",
|
|
||||||
+ "description-zh": "应用级请求吞吐量(TPS)异常",
|
|
||||||
"params": {
|
|
||||||
"look_back": 10,
|
|
||||||
"obs_size": 25,
|
|
||||||
diff --git a/config/module/disk_throughput.json b/config/module/disk_throughput.json
|
|
||||||
index 00276c0..f6244f6 100644
|
|
||||||
--- a/config/module/disk_throughput.json
|
|
||||||
+++ b/config/module/disk_throughput.json
|
|
||||||
@@ -12,6 +12,7 @@
|
|
||||||
"entity_name": "disk",
|
|
||||||
"enable": true,
|
|
||||||
"description": "Disk read await time is increasing!",
|
|
||||||
+ "description-zh": "磁盘读响应时间升高,性能发生劣化",
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
@@ -28,6 +29,7 @@
|
|
||||||
"entity_name": "disk",
|
|
||||||
"enable": true,
|
|
||||||
"description": "Disk write await time is increasing!",
|
|
||||||
+ "description-zh": "磁盘写响应时间升高,性能发生劣化",
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
@@ -68,24 +70,28 @@
|
|
||||||
"metric": "gala_gopher_disk_rspeed_kB",
|
|
||||||
"priority": 0,
|
|
||||||
"description": "The disk I/O await time performance deteriorates due to read throughput rise (read kbytes/second).(Disk = {})",
|
|
||||||
+ "description-zh": "磁盘读吞吐量异常升高,导致I/O等待时间性能劣化(Disk = {})",
|
|
||||||
"atrend": "rise"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_disk_wspeed_kB",
|
|
||||||
"priority": 0,
|
|
||||||
"description": "The disk I/O await time performance deteriorates due to write throughput rise (write kbytes/second).(Disk = {})",
|
|
||||||
+ "description-zh": "磁盘写吞吐量异常升高,导致I/O等待时间性能劣化(Disk = {})",
|
|
||||||
"atrend": "rise"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_disk_rareq",
|
|
||||||
"priority": 0,
|
|
||||||
"description": "The disk I/O await time performance deteriorates due to read saturation rise.(Disk = {})",
|
|
||||||
+ "description-zh": "磁盘读饱和度量异常升高,导致I/O等待时间性能劣化(Disk = {})",
|
|
||||||
"atrend": "rise"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_disk_wareq",
|
|
||||||
"priority": 0,
|
|
||||||
"description": "The disk I/O await time performance deteriorates due to write saturation rise.(Disk = {})",
|
|
||||||
+ "description-zh": "磁盘读写饱和度量异常升高,导致I/O等待时间性能劣化(Disk = {})",
|
|
||||||
"atrend": "rise"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
diff --git a/config/module/proc_io_latency.json b/config/module/proc_io_latency.json
|
|
||||||
index c6c03c1..f086b87 100644
|
|
||||||
--- a/config/module/proc_io_latency.json
|
|
||||||
+++ b/config/module/proc_io_latency.json
|
|
||||||
@@ -12,6 +12,7 @@
|
|
||||||
"entity_name": "proc",
|
|
||||||
"enable": true,
|
|
||||||
"description": "I/O operation delay at the BIO layer (unit: us)",
|
|
||||||
+ "description-zh": "BIO层I/O操作延时高(单位:us)",
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 37,
|
|
||||||
@@ -28,6 +29,7 @@
|
|
||||||
"entity_name": "proc",
|
|
||||||
"enable": true,
|
|
||||||
"description": "Number of small I/O (less than 4 KB) read operations at the BIO layer.",
|
|
||||||
+ "description-zh": "BIO层小数据I/O读操作数量异常(小于4KB)",
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
@@ -44,6 +46,7 @@
|
|
||||||
"entity_name": "proc",
|
|
||||||
"enable": true,
|
|
||||||
"description": "Number of small I/O (less than 4 KB) write operations at the BIO layer.",
|
|
||||||
+ "description-zh": "BIO层小数据I/O写操作数量异常(小于4KB)",
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
@@ -61,6 +64,7 @@
|
|
||||||
"entity_name": "proc",
|
|
||||||
"enable": true,
|
|
||||||
"description": "Number of big I/O (greater than 4 KB) read operations at the BIO layer.",
|
|
||||||
+ "description-zh": "BIO层大数据I/O读操作数量异常(大于4KB)",
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
@@ -76,7 +80,8 @@
|
|
||||||
"kpi_type": "",
|
|
||||||
"entity_name": "proc",
|
|
||||||
"enable": true,
|
|
||||||
- "description": "Number of big I/O (greater than 4 KB) read operations at the BIO layer.",
|
|
||||||
+ "description": "Number of big I/O (greater than 4 KB) write operations at the BIO layer.",
|
|
||||||
+ "description-zh": "BIO层大数据写操作数量异常(大于4KB)",
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
@@ -116,42 +121,50 @@
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_block_latency_req_max",
|
|
||||||
"priority": 4,
|
|
||||||
- "description": "The system I/O performance deteriorates due to a drive failure.(Disk = {})"
|
|
||||||
+ "description": "Process I/O performance deteriorates due to system I/O bandwidth insufficient.(Disk = {})",
|
|
||||||
+ "description-zh": "系统I/O带宽不足引起进程I/O性能劣化(Disk={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_block_latency_device_max",
|
|
||||||
"priority": 3,
|
|
||||||
- "description": "Degraded system I/O performance due to device (disk) failure.(Disk = {})"
|
|
||||||
+ "description": "Process I/O performance deteriorates due to device I/O bandwidth insufficient.(Disk = {})",
|
|
||||||
+ "description-zh": "设备I/O带宽不足引起进程I/O性能劣化(Disk={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_block_read_bytes",
|
|
||||||
"priority": 2,
|
|
||||||
- "description": "System performance deteriorates due to frequent read I/O operations.(Disk = {})"
|
|
||||||
+ "description": "Process I/O performance deteriorates due to frequent read I/O operations.(Disk = {})",
|
|
||||||
+ "description-zh": "频繁I/O读操作引起进程I/O性能劣化(Disk={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_block_write_bytes",
|
|
||||||
"priority": 2,
|
|
||||||
- "description": "System performance deteriorates due to frequent write I/O operations.(Disk = {})"
|
|
||||||
+ "description": "Process I/O performance deteriorates due to frequent write I/O operations.(Disk = {})",
|
|
||||||
+ "description-zh": "频繁写操作引起进程I/O性能劣化(Disk={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_proc_less_4k_io_read",
|
|
||||||
"priority": 0,
|
|
||||||
- "description": "System performance degrades due to frequent small I/O read operations.(Disk = {}, PID = {}, comm = {})"
|
|
||||||
+ "description": "Process I/O performance degrades due to frequent small I/O read operations.(Disk = {}, PID = {}, comm = {})",
|
|
||||||
+ "description-zh": "频繁小数据量(小于4KB)读操作引起进程I/O性能劣化(Disk={},PID={},comm={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_proc_less_4k_io_write",
|
|
||||||
"priority": 0,
|
|
||||||
- "description": "System performance degrades due to frequent small I/O write operations.(Disk = {}, PID = {}, comm = {})"
|
|
||||||
+ "description": "Process I/O performance degrades due to frequent small I/O write operations.(Disk = {}, PID = {}, comm = {})",
|
|
||||||
+ "description-zh": "频繁小数据量(小于4KB)写操作引起进程I/O性能劣化(Disk={},PID={},comm={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_proc_greater_4k_io_read",
|
|
||||||
"priority": 1,
|
|
||||||
- "description": "System performance degrades due to frequent big I/O read operations.(Disk = {}, PID = {}, comm = {})"
|
|
||||||
+ "description": "Process I/O performance degrades due to frequent big I/O read operations.(Disk = {}, PID = {}, comm = {})",
|
|
||||||
+ "description-zh": "频繁大数据量(大于4KB)读操作引起进程I/O性能劣化(Disk={},PID={},comm={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_proc_greater_4k_io_write",
|
|
||||||
"priority": 1,
|
|
||||||
- "description": "System performance degrades due to frequent big I/O write operations.(Disk = {}, PID = {}, comm = {})"
|
|
||||||
+ "description": "Process I/O performance degrades due to frequent big I/O write operations.(Disk = {}, PID = {}, comm = {})",
|
|
||||||
+ "description-zh": "频繁大数据量(大于4KB)写操作引起进程I/O性能劣化(Disk={},PID={},comm={})"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/config/module/sys_io_latency.json b/config/module/sys_io_latency.json
|
|
||||||
index e58990d..bdf17d3 100644
|
|
||||||
--- a/config/module/sys_io_latency.json
|
|
||||||
+++ b/config/module/sys_io_latency.json
|
|
||||||
@@ -12,6 +12,7 @@
|
|
||||||
"entity_name": "block",
|
|
||||||
"enable": true,
|
|
||||||
"description": "Block I/O latency performance is deteriorating!",
|
|
||||||
+ "description-zh": "Block层I/O操作时延性能劣化",
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
@@ -51,42 +52,50 @@
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_block_latency_driver_max",
|
|
||||||
"priority": 4,
|
|
||||||
- "description": "The system I/O performance deteriorates due to a drive failure.(Disk = {})"
|
|
||||||
+ "description": "The system I/O performance deteriorates due to a drive failure.(Disk = {})",
|
|
||||||
+ "description-zh": "驱动异常引起系统I/O性能劣化(Disk={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_block_latency_device_max",
|
|
||||||
"priority": 3,
|
|
||||||
- "description": "Degraded system I/O performance due to device (disk) failure.(Disk = {})"
|
|
||||||
+ "description": "Degraded system I/O performance due to device (disk) failure.(Disk = {})",
|
|
||||||
+ "description-zh": "设备(磁盘)异常引起系统I/O性能劣化(Disk={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_block_read_bytes",
|
|
||||||
"priority": 2,
|
|
||||||
- "description": "System performance deteriorates due to frequent read I/O operations.(Disk = {})"
|
|
||||||
+ "description": "System performance deteriorates due to frequent read I/O operations.(Disk = {})",
|
|
||||||
+ "description-zh": "频繁读操作引起系统I/O性能劣化(Disk={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_block_write_bytes",
|
|
||||||
"priority": 2,
|
|
||||||
- "description": "System performance deteriorates due to frequent write I/O operations.(Disk = {})"
|
|
||||||
+ "description": "System performance deteriorates due to frequent write I/O operations.(Disk = {})",
|
|
||||||
+ "description-zh": "频繁写操作引起系统I/O性能劣化(Disk={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_proc_less_4k_io_read",
|
|
||||||
"priority": 0,
|
|
||||||
- "description": "System performance degrades due to frequent small I/O read operations.(Disk = {}, PID = {}, comm = {})"
|
|
||||||
+ "description": "System performance degrades due to frequent small I/O read operations.(Disk = {}, PID = {}, comm = {})",
|
|
||||||
+ "description-zh": "频繁小数据量(小于4KB)读操作引起系统I/O性能劣化(Disk={},PID={},comm={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_proc_less_4k_io_write",
|
|
||||||
"priority": 0,
|
|
||||||
- "description": "System performance degrades due to frequent small I/O write operations.(Disk = {}, PID = {}, comm = {})"
|
|
||||||
+ "description": "System performance degrades due to frequent small I/O write operations.(Disk = {}, PID = {}, comm = {})",
|
|
||||||
+ "description-zh": "频繁小数据量(小于4KB)写操作引起系统I/O性能劣化(Disk={},PID={},comm={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_proc_greater_4k_io_read",
|
|
||||||
"priority": 1,
|
|
||||||
- "description": "System performance degrades due to frequent big I/O read operations.(Disk = {}, PID = {}, comm = {})"
|
|
||||||
+ "description": "System performance degrades due to frequent big I/O read operations.(Disk = {}, PID = {}, comm = {})",
|
|
||||||
+ "description-zh": "频繁大数据量(大于4KB)读操作引起系统I/O性能劣化(Disk={},PID={},comm={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_proc_greater_4k_io_write",
|
|
||||||
"priority": 1,
|
|
||||||
- "description": "System performance degrades due to frequent big I/O write operations.(Disk = {}, PID = {}, comm = {})"
|
|
||||||
+ "description": "System performance degrades due to frequent big I/O write operations.(Disk = {}, PID = {}, comm = {})",
|
|
||||||
+ "description-zh": "频繁大数据量(大于4KB)写操作引起系统I/O性能劣化(Disk={},PID={},comm={})"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/config/module/sys_nic_loss.json b/config/module/sys_nic_loss.json
|
|
||||||
index 793f82f..8a1feb8 100644
|
|
||||||
--- a/config/module/sys_nic_loss.json
|
|
||||||
+++ b/config/module/sys_nic_loss.json
|
|
||||||
@@ -11,7 +11,8 @@
|
|
||||||
"kpi_type": "",
|
|
||||||
"entity_name": "nic",
|
|
||||||
"enable": true,
|
|
||||||
- "description": "TC发送丢包数异常",
|
|
||||||
+ "description": "TC sent dropped packets",
|
|
||||||
+ "description-zh": "TC发送丢包数异常",
|
|
||||||
"params": {
|
|
||||||
"look_back": 2,
|
|
||||||
"th": 1
|
|
||||||
@@ -22,32 +23,38 @@
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_nic_tx_dropped",
|
|
||||||
"priority": 0,
|
|
||||||
- "description": "网卡发送丢弃的数据包数异常。(dev_name = {})"
|
|
||||||
+ "description": "The number of lost packets sent by the nic card are increasing and the NIC performance deteriorates.(dev_name = {})",
|
|
||||||
+ "description-zh": "网卡发送丢弃的数据包数增加,导致网卡性能劣化(dev_name={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_nic_rx_dropped",
|
|
||||||
"priority": 0,
|
|
||||||
- "description": "网卡接收丢弃的数据包数异常。(dev_name = {})"
|
|
||||||
+ "description": "The number of lost packets received by the nic card are increasing and the NIC performance deteriorates.(dev_name = {})",
|
|
||||||
+ "description-zh": "网卡接收丢弃的数据包数增加,导致网卡性能劣化(dev_name={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_sk_drops",
|
|
||||||
"priority": 3,
|
|
||||||
- "description": "Packets are lost in the host protocol stack due to unknown causes, and the TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "Packets are lost in the host protocol stack due to unknown causes, and the NIC performance deteriorates. (PID = {}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "由于未知原因,数据包在主机协议栈中丢失,导致网卡性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_retran_packets",
|
|
||||||
"priority": 1,
|
|
||||||
- "description": "TCP retransmission is triggered due to network faults, resulting in TCP performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "TCP retransmission is triggered due to network faults, resulting in the NIC performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "网络故障触发TCP重传,导致网卡性能下降(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_lost_out",
|
|
||||||
"priority": 3,
|
|
||||||
- "description": "The network may be congested, causing abnormal TCP packet loss and performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "The network may be congested, causing abnormal NIC packet loss and performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "网络拥塞,导致网卡异常丢包,性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_notsent_bytes",
|
|
||||||
"priority": 4,
|
|
||||||
- "description": "Due to network delay or peer application performance, too many packets to be sent are accumulated in the sliding window. As a result, TCP performance deteriorates. (PID = {}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "Due to network delay or peer application performance, too many packets to be sent are accumulated in the sliding window. As a result, TCP performance deteriorates. (PID = {}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "由于网络延迟或对端应用程序性能,滑动窗口中累积了太多要发送的数据包,导致网卡性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/config/module/sys_tcp_establish.json b/config/module/sys_tcp_establish.json
|
|
||||||
index 2c158c0..7cd2369 100644
|
|
||||||
--- a/config/module/sys_tcp_establish.json
|
|
||||||
+++ b/config/module/sys_tcp_establish.json
|
|
||||||
@@ -12,6 +12,7 @@
|
|
||||||
"entity_name": "tcp_link",
|
|
||||||
"enable": true,
|
|
||||||
"description": "RTT of syn packet(us): the max syn packets rtt is {:.0f} us",
|
|
||||||
+ "description-zh": "SYN数据包时延异常:最大SYN数据包时延为:{:.0f}us。",
|
|
||||||
"params": {
|
|
||||||
"look_back": 30,
|
|
||||||
"outlier_ratio_th": 0.5,
|
|
||||||
@@ -24,7 +25,8 @@
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_endpoint_retran_synacks",
|
|
||||||
"priority": 0,
|
|
||||||
- "description": "TCP established performance deteriorates due to loss of SYN/ACK packets.(PID = {}, TCP Listen Port = {})"
|
|
||||||
+ "description": "TCP established performance deteriorates due to loss of SYN/ACK packets.(PID = {}, TCP Listen Port = {})",
|
|
||||||
+ "description-zh": "由于SYN/ACK数据包丢失,TCP建链性能劣化(PID={},TCP Listen Port={})"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/config/module/sys_tcp_transmission_latency.json b/config/module/sys_tcp_transmission_latency.json
|
|
||||||
index d9e7f80..0527487 100644
|
|
||||||
--- a/config/module/sys_tcp_transmission_latency.json
|
|
||||||
+++ b/config/module/sys_tcp_transmission_latency.json
|
|
||||||
@@ -12,10 +12,11 @@
|
|
||||||
"entity_name": "tcp_link",
|
|
||||||
"enable": true,
|
|
||||||
"description": "Smoothed Round Trip Time(us)",
|
|
||||||
+ "description-zh": "TCP链接往返时延异常,性能劣化",
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
- "n": 4,
|
|
||||||
+ "n": 3,
|
|
||||||
"outlier_ratio_th": 0.4,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
@@ -52,57 +53,68 @@
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_notsent_bytes",
|
|
||||||
"priority": 4,
|
|
||||||
- "description": "Due to network delay or peer application performance, too many packets to be sent are accumulated in the sliding window. As a result, TCP performance deteriorates. (PID = {}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "Due to network delay or peer application performance, too many packets to be sent are accumulated in the sliding window. As a result, TCP performance deteriorates. (PID = {}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "由于网络延迟或对端应用程序性能,滑动窗口中累积了太多要发送的数据包,导致TCP传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_notack_bytes",
|
|
||||||
"priority": 4,
|
|
||||||
- "description": "Due to network delay or peer application performance, too many NO ACK packets are accumulated in the sliding window. As a result, TCP performance deteriorates. (PID = {}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "Due to network delay or peer application performance, too many NO ACK packets are accumulated in the sliding window. As a result, TCP performance deteriorates. (PID = {}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "由于网络延迟或对端应用程序性能,滑动窗口中累积了过多的NO ACK数据包,导致TCP传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_snd_wnd",
|
|
||||||
"priority": 4,
|
|
||||||
- "description": "The TCP send window is abnormal due to peer application performance or network congestion. As a result, the TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "The TCP send window is abnormal due to peer application performance or network congestion. As a result, the TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "对端应用性能或网络拥塞导致TCP发送窗口异常,导致TCP传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_rcv_wnd",
|
|
||||||
"priority": 4,
|
|
||||||
- "description": "The TCP receive window becomes abnormal due to the local application performance. As a result, the performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "The TCP receive window becomes abnormal due to the local application performance. As a result, the performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "本地应用性能导致TCP接收窗口异常,传输性能变差(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_avl_snd_wnd",
|
|
||||||
"priority": 4,
|
|
||||||
- "description": "The available TCP send window may be abnormal due to network congestion and the performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "The available TCP send window may be abnormal due to network congestion and the performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "可用的TCP发送窗口可能因网络拥塞而异常,传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_lost_out",
|
|
||||||
"priority": 3,
|
|
||||||
- "description": "The network may be congested, causing abnormal TCP packet loss and performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "The network may be congested, causing abnormal TCP packet loss and performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "网络可能拥塞,导致TCP异常丢包,传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_sk_drops",
|
|
||||||
"priority": 3,
|
|
||||||
- "description": "Packets are lost in the host protocol stack due to unknown causes, and the TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "Packets are lost in the host protocol stack due to unknown causes, and the TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "主机协议栈不明原因丢包,导致TCP传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_retran_packets",
|
|
||||||
"priority": 1,
|
|
||||||
- "description": "TCP retransmission is triggered due to network faults, resulting in TCP performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "TCP retransmission is triggered due to network faults, resulting in TCP performance deterioration. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "网络故障触发TCP重传,导致TCP传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_backlog_drops",
|
|
||||||
"priority": 0,
|
|
||||||
- "description": "TCP backlog overflows due to local application performance. As a result, TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "TCP backlog overflows due to local application performance. As a result, TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "由于本地应用程序性能问题,TCP积压溢出,导致TCP传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_sacked_out",
|
|
||||||
"priority": 2,
|
|
||||||
- "description": "TCP performance deteriorates due to network out-of-order. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "TCP performance deteriorates due to network out-of-order. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "网络乱序导致TCP传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"metric": "gala_gopher_tcp_link_sk_backlog_size",
|
|
||||||
"priority": 0,
|
|
||||||
- "description": "The TCP backlog queue length is abnormal due to the local application performance. As a result, the TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})"
|
|
||||||
+ "description": "The TCP backlog queue length is abnormal due to the local application performance. As a result, the TCP performance deteriorates. (PID ={}, client IP = {}, Server IP = {}, Port = {})",
|
|
||||||
+ "description-zh": "本地应用性能导致TCP backlog队列长度异常,TCP传输性能劣化(PID={},client IP={},Server IP={},Port={})"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
\ No newline at end of file
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
@ -1,81 +0,0 @@
|
|||||||
From 2ef581e4960dd0ba49bbe371496933841da001fe Mon Sep 17 00:00:00 2001
|
|
||||||
From: lizhenxing11 <lizhenxing11@huawei.com>
|
|
||||||
Date: Mon, 9 Jan 2023 15:08:01 +0800
|
|
||||||
Subject: [PATCH] add systemd service for anteater
|
|
||||||
|
|
||||||
add manifest.in
|
|
||||||
---
|
|
||||||
MANIFEST.in | 11 +++++++++++
|
|
||||||
service/gala-anteater.service | 12 ++++++++++++
|
|
||||||
setup.py | 5 +++--
|
|
||||||
3 files changed, 26 insertions(+), 2 deletions(-)
|
|
||||||
create mode 100644 MANIFEST.in
|
|
||||||
create mode 100644 service/gala-anteater.service
|
|
||||||
|
|
||||||
diff --git a/MANIFEST.in b/MANIFEST.in
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..7120af9
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/MANIFEST.in
|
|
||||||
@@ -0,0 +1,11 @@
|
|
||||||
+include LICENSE
|
|
||||||
+include README.en.md
|
|
||||||
+include README.md
|
|
||||||
+include requirements.txt
|
|
||||||
+
|
|
||||||
+recursive-include service *
|
|
||||||
+recursive-include tests *
|
|
||||||
+recursive-include docs *
|
|
||||||
+
|
|
||||||
+recursive-exclude * __pycache__
|
|
||||||
+recursive-exclude * *.py[co]
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/service/gala-anteater.service b/service/gala-anteater.service
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..24af354
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/service/gala-anteater.service
|
|
||||||
@@ -0,0 +1,12 @@
|
|
||||||
+[Unit]
|
|
||||||
+Description=A-Ops gala-anteater service
|
|
||||||
+After=network.target
|
|
||||||
+
|
|
||||||
+[Service]
|
|
||||||
+Type=exec
|
|
||||||
+ExecStart=/usr/bin/gala-anteater
|
|
||||||
+Restart=on-failure
|
|
||||||
+RestartSec=1
|
|
||||||
+
|
|
||||||
+[Install]
|
|
||||||
+WantedBy=multi-user.target
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/setup.py b/setup.py
|
|
||||||
index 4471a0f..e075391 100644
|
|
||||||
--- a/setup.py
|
|
||||||
+++ b/setup.py
|
|
||||||
@@ -23,11 +23,12 @@ setup(
|
|
||||||
description="Times Series Anomaly Detection Platform on Operating System",
|
|
||||||
url="https://gitee.com/openeuler/A-Ops/tree/master/gala-anteater",
|
|
||||||
keywords=["Anomaly Detection", "Time Series Analysis", "Operating System"],
|
|
||||||
- packages=find_packages(where="."),
|
|
||||||
+ packages=find_packages(where=".", exclude=("tests",)),
|
|
||||||
data_files=[
|
|
||||||
('/etc/gala-anteater/config/', glob('config/gala-anteater.yaml')),
|
|
||||||
('/etc/gala-anteater/config/', glob('config/log.settings.ini')),
|
|
||||||
('/etc/gala-anteater/config/module/', glob('config/module/*')),
|
|
||||||
+ ('/usr/lib/systemd/system/', glob('service/*')),
|
|
||||||
],
|
|
||||||
install_requires=[
|
|
||||||
"APScheduler",
|
|
||||||
@@ -42,7 +43,7 @@ setup(
|
|
||||||
],
|
|
||||||
entry_points={
|
|
||||||
"console_scripts": [
|
|
||||||
- "gala-anteater = anteater.main:main",
|
|
||||||
+ "gala-anteater=anteater.main:main",
|
|
||||||
]
|
|
||||||
}
|
|
||||||
)
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
@ -1,737 +0,0 @@
|
|||||||
From 27bb7cdd80f76bfc7ebb0f3041544740aa2fa91b Mon Sep 17 00:00:00 2001
|
|
||||||
From: lizhenxing11 <lizhenxing11@huawei.com>
|
|
||||||
Date: Tue, 10 Jan 2023 15:31:44 +0800
|
|
||||||
Subject: [PATCH] fix str2enum bug & data query refactor
|
|
||||||
|
|
||||||
---
|
|
||||||
anteater/core/anomaly.py | 10 ++++
|
|
||||||
anteater/core/kpi.py | 14 ++++++
|
|
||||||
anteater/model/algorithms/slope.py | 11 +++--
|
|
||||||
anteater/model/detector/base.py | 20 ++++----
|
|
||||||
anteater/model/detector/n_sigma_detector.py | 15 +++---
|
|
||||||
.../model/detector/online_vae_detector.py | 3 +-
|
|
||||||
.../tcp_establish_n_sigma_detector.py | 3 +-
|
|
||||||
.../tcp_trans_latency_n_sigma_detector.py | 48 +++++++++++++++++--
|
|
||||||
anteater/model/detector/th_base_detector.py | 3 +-
|
|
||||||
anteater/module/app/app_sli_detector.py | 4 +-
|
|
||||||
anteater/module/sys/disk_throughput.py | 4 +-
|
|
||||||
anteater/module/sys/proc_io_latency.py | 4 +-
|
|
||||||
anteater/module/sys/sys_io_latency.py | 4 +-
|
|
||||||
.../module/sys/tcp_transmission_latency.py | 4 +-
|
|
||||||
.../module/sys/tcp_transmission_throughput.py | 4 +-
|
|
||||||
anteater/source/metric_loader.py | 41 +++++++++++++++-
|
|
||||||
anteater/utils/data_load.py | 4 +-
|
|
||||||
config/module/app_sli_rtt.json | 6 ++-
|
|
||||||
config/module/disk_throughput.json | 6 ++-
|
|
||||||
config/module/proc_io_latency.json | 15 ++++--
|
|
||||||
config/module/sys_io_latency.json | 2 +-
|
|
||||||
config/module/sys_tcp_establish.json | 2 +-
|
|
||||||
.../module/sys_tcp_transmission_latency.json | 4 +-
|
|
||||||
23 files changed, 172 insertions(+), 59 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/anteater/core/anomaly.py b/anteater/core/anomaly.py
|
|
||||||
index 45c4fc3..fdee3d1 100644
|
|
||||||
--- a/anteater/core/anomaly.py
|
|
||||||
+++ b/anteater/core/anomaly.py
|
|
||||||
@@ -52,3 +52,13 @@ class AnomalyTrend(Enum):
|
|
||||||
DEFAULT = 0
|
|
||||||
RISE = 1
|
|
||||||
FALL = 2
|
|
||||||
+
|
|
||||||
+ @staticmethod
|
|
||||||
+ def from_str(label: str):
|
|
||||||
+ """Trans str to Enum type"""
|
|
||||||
+ if label.upper() == 'RISE':
|
|
||||||
+ return AnomalyTrend.RISE
|
|
||||||
+ elif label.upper() == 'FALL':
|
|
||||||
+ return AnomalyTrend.FALL
|
|
||||||
+ else:
|
|
||||||
+ return AnomalyTrend.DEFAULT
|
|
||||||
diff --git a/anteater/core/kpi.py b/anteater/core/kpi.py
|
|
||||||
index f83b666..70cc9ee 100644
|
|
||||||
--- a/anteater/core/kpi.py
|
|
||||||
+++ b/anteater/core/kpi.py
|
|
||||||
@@ -27,6 +27,13 @@ class KPI:
|
|
||||||
params: dict = field(default=dict)
|
|
||||||
atrend: AnomalyTrend = AnomalyTrend.DEFAULT
|
|
||||||
|
|
||||||
+ @classmethod
|
|
||||||
+ def from_dict(cls, **data):
|
|
||||||
+ if 'atrend' in data:
|
|
||||||
+ data['atrend'] = AnomalyTrend.from_str(data.get('atrend'))
|
|
||||||
+
|
|
||||||
+ return cls(**data)
|
|
||||||
+
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Feature:
|
|
||||||
@@ -35,6 +42,13 @@ class Feature:
|
|
||||||
priority: int = 0
|
|
||||||
atrend: AnomalyTrend = AnomalyTrend.DEFAULT
|
|
||||||
|
|
||||||
+ @classmethod
|
|
||||||
+ def from_dict(cls, **data):
|
|
||||||
+ if 'atrend' in data:
|
|
||||||
+ data['atrend'] = AnomalyTrend.from_str(data.get('atrend'))
|
|
||||||
+
|
|
||||||
+ return cls(**data)
|
|
||||||
+
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ModelConfig:
|
|
||||||
diff --git a/anteater/model/algorithms/slope.py b/anteater/model/algorithms/slope.py
|
|
||||||
index d324d58..e546183 100644
|
|
||||||
--- a/anteater/model/algorithms/slope.py
|
|
||||||
+++ b/anteater/model/algorithms/slope.py
|
|
||||||
@@ -17,6 +17,7 @@ import numpy as np
|
|
||||||
|
|
||||||
from anteater.core.anomaly import AnomalyTrend
|
|
||||||
from anteater.model.algorithms.smooth import conv_smooth
|
|
||||||
+from anteater.utils.common import divide
|
|
||||||
|
|
||||||
|
|
||||||
def slope(y, win_len):
|
|
||||||
@@ -36,13 +37,15 @@ def smooth_slope(time_series, windows_length):
|
|
||||||
|
|
||||||
def trend(y, win_len=None):
|
|
||||||
"""Gets the trend for the y"""
|
|
||||||
+ y = conv_smooth(y, box_pts=7)
|
|
||||||
+
|
|
||||||
if not win_len:
|
|
||||||
win_len = len(y) // 2
|
|
||||||
|
|
||||||
- if np.mean(y[:win_len]) < np.mean(y[-win_len:]):
|
|
||||||
+ if divide(np.mean(y[:win_len]), np.mean(y[-win_len:])) < 0.9:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
- elif np.mean(y[:win_len]) > np.mean(y[-win_len:]):
|
|
||||||
+ elif divide(np.mean(y[:win_len]), np.mean(y[-win_len:])) > 1.1:
|
|
||||||
return -1
|
|
||||||
|
|
||||||
else:
|
|
||||||
@@ -51,10 +54,10 @@ def trend(y, win_len=None):
|
|
||||||
|
|
||||||
def check_trend(values: List[float], atrend: AnomalyTrend):
|
|
||||||
"""Checks the values with an 'atrend' trend"""
|
|
||||||
- if atrend == AnomalyTrend.RISE and trend(values) < 0:
|
|
||||||
+ if atrend == AnomalyTrend.RISE and trend(values) != 1:
|
|
||||||
return False
|
|
||||||
|
|
||||||
- if atrend == AnomalyTrend.FALL and trend(values) > 0:
|
|
||||||
+ if atrend == AnomalyTrend.FALL and trend(values) != -1:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
diff --git a/anteater/model/detector/base.py b/anteater/model/detector/base.py
|
|
||||||
index 2b2dafe..a23b6d9 100644
|
|
||||||
--- a/anteater/model/detector/base.py
|
|
||||||
+++ b/anteater/model/detector/base.py
|
|
||||||
@@ -11,6 +11,7 @@
|
|
||||||
# See the Mulan PSL v2 for more details.
|
|
||||||
# ******************************************************************************/
|
|
||||||
|
|
||||||
+import logging
|
|
||||||
import math
|
|
||||||
from abc import abstractmethod
|
|
||||||
from typing import List
|
|
||||||
@@ -39,12 +40,6 @@ class Detector:
|
|
||||||
"""Executes anomaly detection on kpis"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
- def get_unique_machine_id(self, start, end, kpis: List[KPI]) -> List[str]:
|
|
||||||
- """Gets unique machine ids during past minutes"""
|
|
||||||
- metrics = [_kpi.metric for _kpi in kpis]
|
|
||||||
- machine_ids = self.data_loader.get_unique_machines(start, end, metrics)
|
|
||||||
- return machine_ids
|
|
||||||
-
|
|
||||||
def execute(self, job_config: JobConfig) -> List[Anomaly]:
|
|
||||||
"""The main function of the detector"""
|
|
||||||
kpis = job_config.kpis
|
|
||||||
@@ -56,6 +51,12 @@ class Detector:
|
|
||||||
|
|
||||||
return self._execute(kpis, features, top_n=n)
|
|
||||||
|
|
||||||
+ def get_unique_machine_id(self, start, end, kpis: List[KPI]) -> List[str]:
|
|
||||||
+ """Gets unique machine ids during past minutes"""
|
|
||||||
+ metrics = [_kpi.metric for _kpi in kpis]
|
|
||||||
+ machine_ids = self.data_loader.get_unique_machines(start, end, metrics)
|
|
||||||
+ return machine_ids
|
|
||||||
+
|
|
||||||
def find_root_causes(self, anomalies: List[Anomaly], features: List[Feature], top_n=3)\
|
|
||||||
-> List[Anomaly]:
|
|
||||||
"""Finds root causes for each anomaly events"""
|
|
||||||
@@ -82,6 +83,7 @@ class Detector:
|
|
||||||
tmp_ts_scores = self.cal_anomaly_score(f.metric, f.description, machine_id=machine_id)
|
|
||||||
for _ts_score in tmp_ts_scores:
|
|
||||||
if not check_trend(_ts_score.ts.values, f.atrend):
|
|
||||||
+ logging.info(f"Trends Filtered: {f.metric}")
|
|
||||||
_ts_score.score = 0
|
|
||||||
if same_intersection_key_value(_ts_score.ts.labels, filters):
|
|
||||||
ts_scores.append(_ts_score)
|
|
||||||
@@ -101,6 +103,7 @@ class Detector:
|
|
||||||
for _ts_s in ts_scores:
|
|
||||||
if same_intersection_key_value(_ts_s.ts.labels, anomaly.labels):
|
|
||||||
if not check_trend(_ts_s.ts.values, kpi_atrends[anomaly.metric]):
|
|
||||||
+ logging.info(f"Trends Filtered: {anomaly.metric}")
|
|
||||||
anomaly.score = 0
|
|
||||||
else:
|
|
||||||
anomaly.score = _ts_s.score
|
|
||||||
@@ -115,12 +118,11 @@ class Detector:
|
|
||||||
machine_id: str)\
|
|
||||||
-> List[TimeSeriesScore]:
|
|
||||||
"""Calculates metric anomaly scores based on sr model"""
|
|
||||||
- start, end = dt.last(minutes=6)
|
|
||||||
+ start, end = dt.last(minutes=10)
|
|
||||||
point_count = self.data_loader.expected_point_length(start, end)
|
|
||||||
model = SpectralResidual(12, 24, 50)
|
|
||||||
ts_scores = []
|
|
||||||
- ts_list = self.data_loader.\
|
|
||||||
- get_metric(start, end, metric, label_name='machine_id', label_value=machine_id)
|
|
||||||
+ ts_list = self.data_loader.get_metric(start, end, metric, machine_id=machine_id)
|
|
||||||
for _ts in ts_list:
|
|
||||||
if sum(_ts.values) == 0 or \
|
|
||||||
len(_ts.values) < point_count * 0.9 or\
|
|
||||||
diff --git a/anteater/model/detector/n_sigma_detector.py b/anteater/model/detector/n_sigma_detector.py
|
|
||||||
index 3a2ab01..dbf83c6 100644
|
|
||||||
--- a/anteater/model/detector/n_sigma_detector.py
|
|
||||||
+++ b/anteater/model/detector/n_sigma_detector.py
|
|
||||||
@@ -29,10 +29,9 @@ from anteater.utils.log import logger
|
|
||||||
class NSigmaDetector(Detector):
|
|
||||||
"""The three sigma anomaly detector"""
|
|
||||||
|
|
||||||
- def __init__(self, data_loader: MetricLoader, method: str):
|
|
||||||
+ def __init__(self, data_loader: MetricLoader):
|
|
||||||
"""The detector base class initializer"""
|
|
||||||
super().__init__(data_loader)
|
|
||||||
- self.method = method
|
|
||||||
|
|
||||||
def detect_kpis(self, kpis: List[KPI]):
|
|
||||||
"""Executes anomaly detection on kpis"""
|
|
||||||
@@ -48,7 +47,7 @@ class NSigmaDetector(Detector):
|
|
||||||
def detect_signal_kpi(self, kpi, machine_id: str) -> List[Anomaly]:
|
|
||||||
"""Detects kpi based on signal time series anomaly detection model"""
|
|
||||||
outlier_ratio_th = kpi.params['outlier_ratio_th']
|
|
||||||
- ts_scores = self.calculate_metric_three_sigma_score(
|
|
||||||
+ ts_scores = self.calculate_n_sigma_score(
|
|
||||||
kpi.metric, kpi.description, machine_id, **kpi.params)
|
|
||||||
if not ts_scores:
|
|
||||||
logger.warning(f'Key metric {kpi.metric} is null on the target machine {machine_id}!')
|
|
||||||
@@ -68,17 +67,17 @@ class NSigmaDetector(Detector):
|
|
||||||
|
|
||||||
return anomalies
|
|
||||||
|
|
||||||
- def calculate_metric_three_sigma_score(self, metric, description, machine_id: str, **kwargs)\
|
|
||||||
+ def calculate_n_sigma_score(self, metric, description, machine_id: str, **kwargs)\
|
|
||||||
-> List[TimeSeriesScore]:
|
|
||||||
"""Calculate kpi anomaly scores based on three sigma scores"""
|
|
||||||
+ method = kwargs.get('method', 'abs')
|
|
||||||
look_back = kwargs.get('look_back')
|
|
||||||
smooth_params = kwargs.get('smooth_params')
|
|
||||||
obs_size = kwargs.get('obs_size')
|
|
||||||
n = kwargs.get('n', 3)
|
|
||||||
start, end = dt.last(minutes=look_back)
|
|
||||||
point_count = self.data_loader.expected_point_length(start, end)
|
|
||||||
- ts_list = self.data_loader.\
|
|
||||||
- get_metric(start, end, metric, label_name='machine_id', label_value=machine_id)
|
|
||||||
+ ts_list = self.data_loader.get_metric(start, end, metric, machine_id=machine_id)
|
|
||||||
ts_scores = []
|
|
||||||
for _ts in ts_list:
|
|
||||||
dedup_values = [k for k, g in groupby(_ts.values)]
|
|
||||||
@@ -87,12 +86,12 @@ class NSigmaDetector(Detector):
|
|
||||||
len(_ts.values) > point_count * 1.5 or \
|
|
||||||
all(x == _ts.values[0] for x in _ts.values):
|
|
||||||
ratio = 0
|
|
||||||
- elif len(dedup_values) < point_count * 0.3:
|
|
||||||
+ elif len(dedup_values) < point_count * 0.6:
|
|
||||||
ratio = 0
|
|
||||||
else:
|
|
||||||
smoothed_val = smoothing(_ts.values, **smooth_params)
|
|
||||||
outlier, mean, std = n_sigma(
|
|
||||||
- smoothed_val, obs_size=obs_size, n=n, method=self.method)
|
|
||||||
+ smoothed_val, obs_size=obs_size, n=n, method=method)
|
|
||||||
ratio = divide(len(outlier), obs_size)
|
|
||||||
|
|
||||||
ts_scores.append(TimeSeriesScore(ts=_ts, score=ratio, description=description))
|
|
||||||
diff --git a/anteater/model/detector/online_vae_detector.py b/anteater/model/detector/online_vae_detector.py
|
|
||||||
index 63a7b09..0f91576 100644
|
|
||||||
--- a/anteater/model/detector/online_vae_detector.py
|
|
||||||
+++ b/anteater/model/detector/online_vae_detector.py
|
|
||||||
@@ -110,8 +110,7 @@ class OnlineVAEDetector(Detector):
|
|
||||||
metric_dfs = []
|
|
||||||
for metric in metrics:
|
|
||||||
_ts_list = self.data_loader.\
|
|
||||||
- get_metric(start, end, metric, label_name="machine_id",
|
|
||||||
- label_value=machine_id, operator_name='avg')
|
|
||||||
+ get_metric(start, end, metric, operator='avg', keys="machine_id", machine_id=machine_id)
|
|
||||||
|
|
||||||
if len(_ts_list) > 1:
|
|
||||||
raise ValueError(f'Got multiple time_series based on machine id: {len(_ts_list)}')
|
|
||||||
diff --git a/anteater/model/detector/tcp_establish_n_sigma_detector.py b/anteater/model/detector/tcp_establish_n_sigma_detector.py
|
|
||||||
index 82d7837..3720069 100644
|
|
||||||
--- a/anteater/model/detector/tcp_establish_n_sigma_detector.py
|
|
||||||
+++ b/anteater/model/detector/tcp_establish_n_sigma_detector.py
|
|
||||||
@@ -73,8 +73,7 @@ class TcpEstablishNSigmaDetector(Detector):
|
|
||||||
min_rtt = kpi.params.get('min_rtt')
|
|
||||||
|
|
||||||
start, end = dt.last(minutes=look_back)
|
|
||||||
- ts_list = self.data_loader.\
|
|
||||||
- get_metric(start, end, kpi.metric, label_name='machine_id', label_value=machine_id)
|
|
||||||
+ ts_list = self.data_loader.get_metric(start, end, kpi.metric, machine_id=machine_id)
|
|
||||||
|
|
||||||
anomalies = []
|
|
||||||
for _ts in ts_list:
|
|
||||||
diff --git a/anteater/model/detector/tcp_trans_latency_n_sigma_detector.py b/anteater/model/detector/tcp_trans_latency_n_sigma_detector.py
|
|
||||||
index 1eeb95f..6d41775 100644
|
|
||||||
--- a/anteater/model/detector/tcp_trans_latency_n_sigma_detector.py
|
|
||||||
+++ b/anteater/model/detector/tcp_trans_latency_n_sigma_detector.py
|
|
||||||
@@ -11,20 +11,61 @@
|
|
||||||
# See the Mulan PSL v2 for more details.
|
|
||||||
# ******************************************************************************/
|
|
||||||
|
|
||||||
+from itertools import groupby
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
+import numpy as np
|
|
||||||
+
|
|
||||||
from anteater.core.time_series import TimeSeriesScore
|
|
||||||
+from anteater.model.algorithms.smooth import smoothing
|
|
||||||
+from anteater.model.algorithms.three_sigma import n_sigma
|
|
||||||
from anteater.model.detector.n_sigma_detector import NSigmaDetector
|
|
||||||
from anteater.source.metric_loader import MetricLoader
|
|
||||||
+from anteater.utils.common import divide
|
|
||||||
from anteater.utils.datetime import DateTimeManager as dt
|
|
||||||
|
|
||||||
|
|
||||||
class TcpTransLatencyNSigmaDetector(NSigmaDetector):
|
|
||||||
"""The three sigma anomaly detector"""
|
|
||||||
|
|
||||||
- def __init__(self, data_loader: MetricLoader, method: str):
|
|
||||||
+ def __init__(self, data_loader: MetricLoader):
|
|
||||||
"""The detector base class initializer"""
|
|
||||||
- super().__init__(data_loader, method)
|
|
||||||
+ super().__init__(data_loader)
|
|
||||||
+
|
|
||||||
+ def calculate_n_sigma_score(self, metric, description, machine_id: str, **kwargs)\
|
|
||||||
+ -> List[TimeSeriesScore]:
|
|
||||||
+ """Calculates anomaly scores based on n sigma scores"""
|
|
||||||
+ method = kwargs.get('method', 'abs')
|
|
||||||
+ look_back = kwargs.get('look_back')
|
|
||||||
+ smooth_params = kwargs.get('smooth_params')
|
|
||||||
+ obs_size = kwargs.get('obs_size')
|
|
||||||
+ min_srtt = kwargs.get("min_srtt")
|
|
||||||
+ n = kwargs.get('n', 3)
|
|
||||||
+ start, end = dt.last(minutes=look_back)
|
|
||||||
+ point_count = self.data_loader.expected_point_length(start, end)
|
|
||||||
+ ts_list = self.data_loader.get_metric(start, end, metric, machine_id=machine_id)
|
|
||||||
+ ts_scores = []
|
|
||||||
+ for _ts in ts_list:
|
|
||||||
+ dedup_values = [k for k, g in groupby(_ts.values)]
|
|
||||||
+ if sum(_ts.values) == 0 or \
|
|
||||||
+ len(_ts.values) < point_count * 0.6 or \
|
|
||||||
+ len(_ts.values) > point_count * 1.5 or \
|
|
||||||
+ all(x == _ts.values[0] for x in _ts.values):
|
|
||||||
+ ratio = 0
|
|
||||||
+ elif len(dedup_values) < point_count * 0.6:
|
|
||||||
+ ratio = 0
|
|
||||||
+ else:
|
|
||||||
+ smoothed_val = smoothing(_ts.values, **smooth_params)
|
|
||||||
+ outlier, mean, std = n_sigma(
|
|
||||||
+ smoothed_val, obs_size=obs_size, n=n, method=method)
|
|
||||||
+ if outlier and np.average(outlier) <= min_srtt:
|
|
||||||
+ ratio = 0
|
|
||||||
+ else:
|
|
||||||
+ ratio = divide(len(outlier), obs_size)
|
|
||||||
+
|
|
||||||
+ ts_scores.append(TimeSeriesScore(ts=_ts, score=ratio, description=description))
|
|
||||||
+
|
|
||||||
+ return ts_scores
|
|
||||||
|
|
||||||
def cal_anomaly_score(self, metric, description, machine_id: str) \
|
|
||||||
-> List[TimeSeriesScore]:
|
|
||||||
@@ -32,8 +73,7 @@ class TcpTransLatencyNSigmaDetector(NSigmaDetector):
|
|
||||||
start, end = dt.last(minutes=2)
|
|
||||||
point_count = self.data_loader.expected_point_length(start, end)
|
|
||||||
ts_scores = []
|
|
||||||
- ts_list = self.data_loader. \
|
|
||||||
- get_metric(start, end, metric, label_name='machine_id', label_value=machine_id)
|
|
||||||
+ ts_list = self.data_loader.get_metric(start, end, metric, machine_id=machine_id)
|
|
||||||
for _ts in ts_list:
|
|
||||||
if sum(_ts.values) == 0 or \
|
|
||||||
len(_ts.values) < point_count * 0.5 or \
|
|
||||||
diff --git a/anteater/model/detector/th_base_detector.py b/anteater/model/detector/th_base_detector.py
|
|
||||||
index bec9705..0af4f22 100644
|
|
||||||
--- a/anteater/model/detector/th_base_detector.py
|
|
||||||
+++ b/anteater/model/detector/th_base_detector.py
|
|
||||||
@@ -44,8 +44,7 @@ class ThBaseDetector(Detector):
|
|
||||||
look_back = kpi.params.get('look_back')
|
|
||||||
th = kpi.params.get('th')
|
|
||||||
start, end = dt.last(minutes=look_back)
|
|
||||||
- ts_list = self.data_loader.\
|
|
||||||
- get_metric(start, end, kpi.metric, label_name='machine_id', label_value=machine_id)
|
|
||||||
+ ts_list = self.data_loader.get_metric(start, end, kpi.metric, machine_id=machine_id)
|
|
||||||
|
|
||||||
if not ts_list:
|
|
||||||
logger.warning(f'Key metric {kpi.metric} is null on the target machine {machine_id}!')
|
|
||||||
diff --git a/anteater/module/app/app_sli_detector.py b/anteater/module/app/app_sli_detector.py
|
|
||||||
index 102ed11..e506332 100644
|
|
||||||
--- a/anteater/module/app/app_sli_detector.py
|
|
||||||
+++ b/anteater/module/app/app_sli_detector.py
|
|
||||||
@@ -44,12 +44,12 @@ class APPSliDetector(E2EDetector):
|
|
||||||
def init_detectors(self, data_loader):
|
|
||||||
if self.job_config.model_config.enable:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='min'),
|
|
||||||
+ NSigmaDetector(data_loader),
|
|
||||||
OnlineVAEDetector(data_loader, self.job_config.model_config)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='min')
|
|
||||||
+ NSigmaDetector(data_loader)
|
|
||||||
]
|
|
||||||
|
|
||||||
return detectors
|
|
||||||
diff --git a/anteater/module/sys/disk_throughput.py b/anteater/module/sys/disk_throughput.py
|
|
||||||
index 9a192fb..7971505 100644
|
|
||||||
--- a/anteater/module/sys/disk_throughput.py
|
|
||||||
+++ b/anteater/module/sys/disk_throughput.py
|
|
||||||
@@ -38,12 +38,12 @@ class DiskThroughputDetector(E2EDetector):
|
|
||||||
def init_detectors(self, data_loader):
|
|
||||||
if self.job_config.model_config.enable:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='max'),
|
|
||||||
+ NSigmaDetector(data_loader),
|
|
||||||
OnlineVAEDetector(data_loader, self.job_config.model_config)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='max')
|
|
||||||
+ NSigmaDetector(data_loader)
|
|
||||||
]
|
|
||||||
|
|
||||||
return detectors
|
|
||||||
diff --git a/anteater/module/sys/proc_io_latency.py b/anteater/module/sys/proc_io_latency.py
|
|
||||||
index a34c48d..b76acea 100644
|
|
||||||
--- a/anteater/module/sys/proc_io_latency.py
|
|
||||||
+++ b/anteater/module/sys/proc_io_latency.py
|
|
||||||
@@ -38,12 +38,12 @@ class ProcIOLatencyDetector(E2EDetector):
|
|
||||||
def init_detectors(self, data_loader):
|
|
||||||
if self.job_config.model_config.enable:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='max'),
|
|
||||||
+ NSigmaDetector(data_loader),
|
|
||||||
OnlineVAEDetector(data_loader, self.job_config.model_config)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='max')
|
|
||||||
+ NSigmaDetector(data_loader)
|
|
||||||
]
|
|
||||||
|
|
||||||
return detectors
|
|
||||||
diff --git a/anteater/module/sys/sys_io_latency.py b/anteater/module/sys/sys_io_latency.py
|
|
||||||
index a6f01c2..17a34c9 100644
|
|
||||||
--- a/anteater/module/sys/sys_io_latency.py
|
|
||||||
+++ b/anteater/module/sys/sys_io_latency.py
|
|
||||||
@@ -38,12 +38,12 @@ class SysIOLatencyDetector(E2EDetector):
|
|
||||||
def init_detectors(self, data_loader):
|
|
||||||
if self.job_config.model_config.enable:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='abs'),
|
|
||||||
+ NSigmaDetector(data_loader),
|
|
||||||
OnlineVAEDetector(data_loader, self.job_config.model_config)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='abs')
|
|
||||||
+ NSigmaDetector(data_loader)
|
|
||||||
]
|
|
||||||
|
|
||||||
return detectors
|
|
||||||
diff --git a/anteater/module/sys/tcp_transmission_latency.py b/anteater/module/sys/tcp_transmission_latency.py
|
|
||||||
index cf0f406..e085ec3 100644
|
|
||||||
--- a/anteater/module/sys/tcp_transmission_latency.py
|
|
||||||
+++ b/anteater/module/sys/tcp_transmission_latency.py
|
|
||||||
@@ -39,12 +39,12 @@ class SysTcpTransmissionLatencyDetector(E2EDetector):
|
|
||||||
def init_detectors(self, data_loader):
|
|
||||||
if self.job_config.model_config.enable:
|
|
||||||
detectors = [
|
|
||||||
- TcpTransLatencyNSigmaDetector(data_loader, method='max'),
|
|
||||||
+ TcpTransLatencyNSigmaDetector(data_loader),
|
|
||||||
OnlineVAEDetector(data_loader, self.job_config.model_config)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
- TcpTransLatencyNSigmaDetector(data_loader, method='max')
|
|
||||||
+ TcpTransLatencyNSigmaDetector(data_loader)
|
|
||||||
]
|
|
||||||
|
|
||||||
return detectors
|
|
||||||
diff --git a/anteater/module/sys/tcp_transmission_throughput.py b/anteater/module/sys/tcp_transmission_throughput.py
|
|
||||||
index 86ecc9e..2921602 100644
|
|
||||||
--- a/anteater/module/sys/tcp_transmission_throughput.py
|
|
||||||
+++ b/anteater/module/sys/tcp_transmission_throughput.py
|
|
||||||
@@ -38,12 +38,12 @@ class SysTcpTransmissionThroughputDetector(E2EDetector):
|
|
||||||
def init_detectors(self, data_loader):
|
|
||||||
if self.job_config.model_config.enable:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='abs'),
|
|
||||||
+ NSigmaDetector(data_loader),
|
|
||||||
OnlineVAEDetector(data_loader, self.job_config.model_config)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
detectors = [
|
|
||||||
- NSigmaDetector(data_loader, method='abs')
|
|
||||||
+ NSigmaDetector(data_loader)
|
|
||||||
]
|
|
||||||
|
|
||||||
return detectors
|
|
||||||
diff --git a/anteater/source/metric_loader.py b/anteater/source/metric_loader.py
|
|
||||||
index ef2d012..4745d87 100644
|
|
||||||
--- a/anteater/source/metric_loader.py
|
|
||||||
+++ b/anteater/source/metric_loader.py
|
|
||||||
@@ -65,6 +65,43 @@ def get_query(metric: str,
|
|
||||||
return query
|
|
||||||
|
|
||||||
|
|
||||||
+def get_query2(
|
|
||||||
+ metric: str, operator: str = None, value: float = None, keys: Union[str, List] = None, **labels):
|
|
||||||
+ """Gets aggregated query patterns
|
|
||||||
+
|
|
||||||
+ Format: [operator]([value,] metric{[**labels]}) by (keys)
|
|
||||||
+
|
|
||||||
+ Such as:
|
|
||||||
+ - 1. gala_gopher_bind_sends{machine_id="1234"}
|
|
||||||
+ - 2. sum(gala_gopher_bind_sends) by (machine_id)
|
|
||||||
+ - 2. sum(gala_gopher_bind_sends) by (machine_id)
|
|
||||||
+ - 3. sum(gala_gopher_bind_sends{machine_id="1234"}) by (machine_id)
|
|
||||||
+ - 4. quantile(0.7, gala_gopher_bind_sends{machine_id="1234"}) by (machine_id)
|
|
||||||
+ """
|
|
||||||
+ if operator and not keys:
|
|
||||||
+ raise ValueError("Please provide param 'keys' when specified 'operator'!")
|
|
||||||
+
|
|
||||||
+ rule = ""
|
|
||||||
+ if labels:
|
|
||||||
+ pairs = ",".join([f"{n}='{v}'" for n, v in labels.items()])
|
|
||||||
+ rule = f"{{{pairs}}}"
|
|
||||||
+
|
|
||||||
+ group = ""
|
|
||||||
+ if isinstance(keys, list):
|
|
||||||
+ group = ",".join([k for k in keys])
|
|
||||||
+ elif isinstance(keys, str):
|
|
||||||
+ group = keys
|
|
||||||
+
|
|
||||||
+ if operator and value:
|
|
||||||
+ query = f"{operator}({value}, {metric}{rule}) by ({group})"
|
|
||||||
+ elif operator:
|
|
||||||
+ query = f"{operator}({metric}{rule}) by ({group})"
|
|
||||||
+ else:
|
|
||||||
+ query = f"{metric}{rule}"
|
|
||||||
+
|
|
||||||
+ return query
|
|
||||||
+
|
|
||||||
+
|
|
||||||
class MetricLoader:
|
|
||||||
"""
|
|
||||||
The metric loader that consumes raw data from PrometheusAdapter,
|
|
||||||
@@ -87,7 +124,7 @@ class MetricLoader:
|
|
||||||
|
|
||||||
:return List of TimeSeries
|
|
||||||
"""
|
|
||||||
- query = get_query(metric, **kwargs)
|
|
||||||
+ query = get_query2(metric, **kwargs)
|
|
||||||
time_series = self.provider.range_query(start, end, metric, query)
|
|
||||||
|
|
||||||
return time_series
|
|
||||||
@@ -109,7 +146,7 @@ class MetricLoader:
|
|
||||||
"""Gets unique labels of all metrics"""
|
|
||||||
unique_labels = set()
|
|
||||||
for metric in metrics:
|
|
||||||
- time_series = self.get_metric(start, end, metric, label_name=label_name)
|
|
||||||
+ time_series = self.get_metric(start, end, metric)
|
|
||||||
unique_labels.update([item.labels.get(label_name, "") for item in time_series])
|
|
||||||
|
|
||||||
return list([lbl for lbl in unique_labels if lbl])
|
|
||||||
diff --git a/anteater/utils/data_load.py b/anteater/utils/data_load.py
|
|
||||||
index 730c9c6..60c28e5 100644
|
|
||||||
--- a/anteater/utils/data_load.py
|
|
||||||
+++ b/anteater/utils/data_load.py
|
|
||||||
@@ -48,8 +48,8 @@ def load_job_config(file_name) -> JobConfig:
|
|
||||||
keywords = config['keywords']
|
|
||||||
root_cause_number = config['root_cause_number']
|
|
||||||
|
|
||||||
- kpis = [KPI(**update_description(_conf)) for _conf in config['KPI']]
|
|
||||||
- features = [Feature(**update_description(_conf)) for _conf in config['Features']]
|
|
||||||
+ kpis = [KPI.from_dict(**update_description(_conf)) for _conf in config['KPI']]
|
|
||||||
+ features = [Feature.from_dict(**update_description(_conf)) for _conf in config['Features']]
|
|
||||||
|
|
||||||
model_config = None
|
|
||||||
if 'OnlineModel' in config:
|
|
||||||
diff --git a/config/module/app_sli_rtt.json b/config/module/app_sli_rtt.json
|
|
||||||
index 0146883..5027b8d 100644
|
|
||||||
--- a/config/module/app_sli_rtt.json
|
|
||||||
+++ b/config/module/app_sli_rtt.json
|
|
||||||
@@ -10,13 +10,14 @@
|
|
||||||
"metric": "gala_gopher_sli_rtt_nsec",
|
|
||||||
"kpi_type": "rtt",
|
|
||||||
"entity_name": "sli",
|
|
||||||
- "enable": false,
|
|
||||||
+ "enable": true,
|
|
||||||
"description": "sli rtt 异常",
|
|
||||||
"description-zh": "应用级请求往返时延(RTT)异常",
|
|
||||||
"params": {
|
|
||||||
+ "method": "max",
|
|
||||||
"look_back": 10,
|
|
||||||
"obs_size": 25,
|
|
||||||
- "outlier_ratio_th": 0.3,
|
|
||||||
+ "outlier_ratio_th": 0.5,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
"box_pts": 3
|
|
||||||
@@ -31,6 +32,7 @@
|
|
||||||
"description": "sli tps 异常",
|
|
||||||
"description-zh": "应用级请求吞吐量(TPS)异常",
|
|
||||||
"params": {
|
|
||||||
+ "method": "min",
|
|
||||||
"look_back": 10,
|
|
||||||
"obs_size": 25,
|
|
||||||
"outlier_ratio_th": 0.3,
|
|
||||||
diff --git a/config/module/disk_throughput.json b/config/module/disk_throughput.json
|
|
||||||
index f6244f6..e3bcf68 100644
|
|
||||||
--- a/config/module/disk_throughput.json
|
|
||||||
+++ b/config/module/disk_throughput.json
|
|
||||||
@@ -14,9 +14,10 @@
|
|
||||||
"description": "Disk read await time is increasing!",
|
|
||||||
"description-zh": "磁盘读响应时间升高,性能发生劣化",
|
|
||||||
"params": {
|
|
||||||
+ "method": "max",
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
- "outlier_ratio_th": 0.3,
|
|
||||||
+ "outlier_ratio_th": 0.5,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
"box_pts": 3
|
|
||||||
@@ -31,9 +32,10 @@
|
|
||||||
"description": "Disk write await time is increasing!",
|
|
||||||
"description-zh": "磁盘写响应时间升高,性能发生劣化",
|
|
||||||
"params": {
|
|
||||||
+ "method": "max",
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
- "outlier_ratio_th": 0.3,
|
|
||||||
+ "outlier_ratio_th": 0.5,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
"box_pts": 3
|
|
||||||
diff --git a/config/module/proc_io_latency.json b/config/module/proc_io_latency.json
|
|
||||||
index f086b87..171c5f4 100644
|
|
||||||
--- a/config/module/proc_io_latency.json
|
|
||||||
+++ b/config/module/proc_io_latency.json
|
|
||||||
@@ -14,9 +14,10 @@
|
|
||||||
"description": "I/O operation delay at the BIO layer (unit: us)",
|
|
||||||
"description-zh": "BIO层I/O操作延时高(单位:us)",
|
|
||||||
"params": {
|
|
||||||
+ "method":"max",
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 37,
|
|
||||||
- "outlier_ratio_th": 0.4,
|
|
||||||
+ "outlier_ratio_th": 0.5,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
"box_pts": 3
|
|
||||||
@@ -31,9 +32,10 @@
|
|
||||||
"description": "Number of small I/O (less than 4 KB) read operations at the BIO layer.",
|
|
||||||
"description-zh": "BIO层小数据I/O读操作数量异常(小于4KB)",
|
|
||||||
"params": {
|
|
||||||
+ "method":"max",
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
- "outlier_ratio_th": 0.3,
|
|
||||||
+ "outlier_ratio_th": 0.4,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
"box_pts": 3
|
|
||||||
@@ -48,9 +50,10 @@
|
|
||||||
"description": "Number of small I/O (less than 4 KB) write operations at the BIO layer.",
|
|
||||||
"description-zh": "BIO层小数据I/O写操作数量异常(小于4KB)",
|
|
||||||
"params": {
|
|
||||||
+ "method":"max",
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
- "outlier_ratio_th": 0.3,
|
|
||||||
+ "outlier_ratio_th": 0.4,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "savgol_smooth",
|
|
||||||
"window_length": 13,
|
|
||||||
@@ -66,9 +69,10 @@
|
|
||||||
"description": "Number of big I/O (greater than 4 KB) read operations at the BIO layer.",
|
|
||||||
"description-zh": "BIO层大数据I/O读操作数量异常(大于4KB)",
|
|
||||||
"params": {
|
|
||||||
+ "method":"max",
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
- "outlier_ratio_th": 0.3,
|
|
||||||
+ "outlier_ratio_th": 0.4,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
"box_pts": 3
|
|
||||||
@@ -83,9 +87,10 @@
|
|
||||||
"description": "Number of big I/O (greater than 4 KB) write operations at the BIO layer.",
|
|
||||||
"description-zh": "BIO层大数据写操作数量异常(大于4KB)",
|
|
||||||
"params": {
|
|
||||||
+ "method":"max",
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
- "outlier_ratio_th": 0.3,
|
|
||||||
+ "outlier_ratio_th": 0.4,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
"box_pts": 3
|
|
||||||
diff --git a/config/module/sys_io_latency.json b/config/module/sys_io_latency.json
|
|
||||||
index bdf17d3..3fa1266 100644
|
|
||||||
--- a/config/module/sys_io_latency.json
|
|
||||||
+++ b/config/module/sys_io_latency.json
|
|
||||||
@@ -16,7 +16,7 @@
|
|
||||||
"params": {
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
- "outlier_ratio_th": 0.3,
|
|
||||||
+ "outlier_ratio_th": 0.4,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
"box_pts": 3
|
|
||||||
diff --git a/config/module/sys_tcp_establish.json b/config/module/sys_tcp_establish.json
|
|
||||||
index 7cd2369..9bd2a46 100644
|
|
||||||
--- a/config/module/sys_tcp_establish.json
|
|
||||||
+++ b/config/module/sys_tcp_establish.json
|
|
||||||
@@ -17,7 +17,7 @@
|
|
||||||
"look_back": 30,
|
|
||||||
"outlier_ratio_th": 0.5,
|
|
||||||
"obs_size": 3,
|
|
||||||
- "min_rtt": 500000
|
|
||||||
+ "min_rtt": 100000
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
diff --git a/config/module/sys_tcp_transmission_latency.json b/config/module/sys_tcp_transmission_latency.json
|
|
||||||
index 0527487..3ba8113 100644
|
|
||||||
--- a/config/module/sys_tcp_transmission_latency.json
|
|
||||||
+++ b/config/module/sys_tcp_transmission_latency.json
|
|
||||||
@@ -14,10 +14,12 @@
|
|
||||||
"description": "Smoothed Round Trip Time(us)",
|
|
||||||
"description-zh": "TCP链接往返时延异常,性能劣化",
|
|
||||||
"params": {
|
|
||||||
+ "method": "max",
|
|
||||||
"look_back": 20,
|
|
||||||
"obs_size": 25,
|
|
||||||
"n": 3,
|
|
||||||
- "outlier_ratio_th": 0.4,
|
|
||||||
+ "min_srtt": 20000,
|
|
||||||
+ "outlier_ratio_th": 0.6,
|
|
||||||
"smooth_params": {
|
|
||||||
"method": "conv_smooth",
|
|
||||||
"box_pts": 3
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
Binary file not shown.
BIN
gala-anteater-1.1.0.tar.gz
Normal file
BIN
gala-anteater-1.1.0.tar.gz
Normal file
Binary file not shown.
@ -1,8 +1,8 @@
|
|||||||
%define debug_package %{nil}
|
%define debug_package %{nil}
|
||||||
|
|
||||||
Name: gala-anteater
|
Name: gala-anteater
|
||||||
Version: 1.0.1
|
Version: 1.1.0
|
||||||
Release: 4
|
Release: 1
|
||||||
Summary: A time-series anomaly detection platform for operating system.
|
Summary: A time-series anomaly detection platform for operating system.
|
||||||
License: MulanPSL2
|
License: MulanPSL2
|
||||||
URL: https://gitee.com/openeuler/gala-anteater
|
URL: https://gitee.com/openeuler/gala-anteater
|
||||||
@ -11,13 +11,6 @@ BuildRoot: %{_builddir}/%{name}-%{version}
|
|||||||
BuildRequires: procps-ng python3-setuptools
|
BuildRequires: procps-ng python3-setuptools
|
||||||
Requires: python3-gala-anteater = %{version}-%{release}
|
Requires: python3-gala-anteater = %{version}-%{release}
|
||||||
|
|
||||||
Patch1: Add-disk-throughput-detector.patch
|
|
||||||
Patch2: Update-TCP-Establish-Model-Add-Nic-Loss-Detector.patch
|
|
||||||
Patch3: add-chinese-descriptions.patch
|
|
||||||
Patch4: remove-sys-level-config-param.patch
|
|
||||||
Patch5: add-systemd-service-for-anteater.patch
|
|
||||||
Patch6: fix-str2enum-bug-data-query-refactor.patch
|
|
||||||
|
|
||||||
%description
|
%description
|
||||||
Abnormal detection module for A-Ops project
|
Abnormal detection module for A-Ops project
|
||||||
|
|
||||||
@ -59,16 +52,18 @@ fi
|
|||||||
%doc README.md
|
%doc README.md
|
||||||
%license LICENSE
|
%license LICENSE
|
||||||
%{_bindir}/gala-anteater
|
%{_bindir}/gala-anteater
|
||||||
|
%config(noreplace) %{_sysconfdir}/%{name}/config/metricinfo.json
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/gala-anteater.yaml
|
%config(noreplace) %{_sysconfdir}/%{name}/config/gala-anteater.yaml
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/log.settings.ini
|
%config(noreplace) %{_sysconfdir}/%{name}/config/log.settings.ini
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/module/app_sli_rtt.json
|
%config(noreplace) %{_sysconfdir}/%{name}/module/app_sli_rtt.job.json
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/module/proc_io_latency.json
|
%config(noreplace) %{_sysconfdir}/%{name}/module/disk_throughput.job.json
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/module/sys_io_latency.json
|
%config(noreplace) %{_sysconfdir}/%{name}/module/jvm_oom.job.json
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/module/sys_tcp_establish.json
|
%config(noreplace) %{_sysconfdir}/%{name}/module/proc_io_latency.job.json
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/module/sys_tcp_transmission_latency.json
|
%config(noreplace) %{_sysconfdir}/%{name}/module/sys_io_latency.job.json
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/module/sys_tcp_transmission_throughput.json
|
%config(noreplace) %{_sysconfdir}/%{name}/module/sys_nic_loss.job.json
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/module/disk_throughput.json
|
%config(noreplace) %{_sysconfdir}/%{name}/module/sys_tcp_establish.job.json
|
||||||
%config(noreplace) %{_sysconfdir}/%{name}/config/module/sys_nic_loss.json
|
%config(noreplace) %{_sysconfdir}/%{name}/module/sys_tcp_transmission_latency.job.json
|
||||||
|
%config(noreplace) %{_sysconfdir}/%{name}/module/usad_model.job.json
|
||||||
/usr/lib/systemd/system/gala-anteater.service
|
/usr/lib/systemd/system/gala-anteater.service
|
||||||
|
|
||||||
%files -n python3-gala-anteater
|
%files -n python3-gala-anteater
|
||||||
@ -77,6 +72,9 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Aug 31 2023 Li Zhenxing <lizhenxing11@huawei.com> - 1.1.0-1
|
||||||
|
- Upgrade anteater version to 1.1.0
|
||||||
|
|
||||||
* Fri Jan 20 2023 Zhen Chen <chenzhen126@huawei.com> - 1.0.1-4
|
* Fri Jan 20 2023 Zhen Chen <chenzhen126@huawei.com> - 1.0.1-4
|
||||||
- eliminate 'Fail to try-restart' warning when downgrading to 1.0.1-1
|
- eliminate 'Fail to try-restart' warning when downgrading to 1.0.1-1
|
||||||
|
|
||||||
|
|||||||
@ -1,98 +0,0 @@
|
|||||||
From 5c6b03a49a49ddc56574e906f959d5fe34c1debc Mon Sep 17 00:00:00 2001
|
|
||||||
From: lizhenxing11 <lizhenxing11@huawei.com>
|
|
||||||
Date: Fri, 6 Jan 2023 10:59:12 +0800
|
|
||||||
Subject: [PATCH] remove 'sys-level' config param
|
|
||||||
|
|
||||||
---
|
|
||||||
anteater/config.py | 1 -
|
|
||||||
anteater/main.py | 29 +++++++++++------------------
|
|
||||||
config/gala-anteater.yaml | 1 -
|
|
||||||
docs/conf_introduction.md | 1 -
|
|
||||||
4 files changed, 11 insertions(+), 21 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/anteater/config.py b/anteater/config.py
|
|
||||||
index e9ab557..caeceec 100644
|
|
||||||
--- a/anteater/config.py
|
|
||||||
+++ b/anteater/config.py
|
|
||||||
@@ -27,7 +27,6 @@ import yaml
|
|
||||||
class GlobalConf:
|
|
||||||
"""The global config"""
|
|
||||||
data_source: str
|
|
||||||
- sys_level: bool
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
diff --git a/anteater/main.py b/anteater/main.py
|
|
||||||
index 4de72f9..87aae95 100644
|
|
||||||
--- a/anteater/main.py
|
|
||||||
+++ b/anteater/main.py
|
|
||||||
@@ -26,7 +26,6 @@ from anteater.module.sys.nic_loss import NICLossDetector
|
|
||||||
from anteater.module.sys.proc_io_latency import ProcIOLatencyDetector
|
|
||||||
from anteater.module.sys.sys_io_latency import SysIOLatencyDetector
|
|
||||||
from anteater.module.sys.tcp_establish import SysTcpEstablishDetector
|
|
||||||
-from anteater.module.sys.tcp_transmission_throughput import SysTcpTransmissionThroughputDetector
|
|
||||||
from anteater.module.sys.tcp_transmission_latency import SysTcpTransmissionLatencyDetector
|
|
||||||
from anteater.provider.kafka import KafkaProvider
|
|
||||||
from anteater.source.anomaly_report import AnomalyReport
|
|
||||||
@@ -49,24 +48,18 @@ def main():
|
|
||||||
kafka_provider = KafkaProvider(conf.kafka)
|
|
||||||
loader = MetricLoader(conf)
|
|
||||||
report = AnomalyReport(kafka_provider)
|
|
||||||
- if conf.global_conf.sys_level:
|
|
||||||
- detectors = [
|
|
||||||
- # APP sli anomaly detection
|
|
||||||
- APPSliDetector(loader, report),
|
|
||||||
+ detectors = [
|
|
||||||
+ # APP sli anomaly detection
|
|
||||||
+ APPSliDetector(loader, report),
|
|
||||||
|
|
||||||
- # SYS tcp/io detection
|
|
||||||
- SysTcpEstablishDetector(loader, report),
|
|
||||||
- SysTcpTransmissionLatencyDetector(loader, report),
|
|
||||||
- SysIOLatencyDetector(loader, report),
|
|
||||||
- ProcIOLatencyDetector(loader, report),
|
|
||||||
- DiskThroughputDetector(loader, report),
|
|
||||||
- NICLossDetector(loader, report),
|
|
||||||
- ]
|
|
||||||
- else:
|
|
||||||
- detectors = [
|
|
||||||
- # APP sli anomaly detection
|
|
||||||
- APPSliDetector(loader, report)
|
|
||||||
- ]
|
|
||||||
+ # SYS tcp/io detection
|
|
||||||
+ SysTcpEstablishDetector(loader, report),
|
|
||||||
+ SysTcpTransmissionLatencyDetector(loader, report),
|
|
||||||
+ SysIOLatencyDetector(loader, report),
|
|
||||||
+ ProcIOLatencyDetector(loader, report),
|
|
||||||
+ DiskThroughputDetector(loader, report),
|
|
||||||
+ NICLossDetector(loader, report),
|
|
||||||
+ ]
|
|
||||||
|
|
||||||
anomaly_detect = AnomalyDetection(detectors, conf)
|
|
||||||
|
|
||||||
diff --git a/config/gala-anteater.yaml b/config/gala-anteater.yaml
|
|
||||||
index c4c54a0..72ffc31 100644
|
|
||||||
--- a/config/gala-anteater.yaml
|
|
||||||
+++ b/config/gala-anteater.yaml
|
|
||||||
@@ -1,6 +1,5 @@
|
|
||||||
Global:
|
|
||||||
data_source: "prometheus"
|
|
||||||
- sys_level: false
|
|
||||||
|
|
||||||
Kafka:
|
|
||||||
server: "localhost"
|
|
||||||
diff --git a/docs/conf_introduction.md b/docs/conf_introduction.md
|
|
||||||
index 09a7284..869d3e9 100644
|
|
||||||
--- a/docs/conf_introduction.md
|
|
||||||
+++ b/docs/conf_introduction.md
|
|
||||||
@@ -16,7 +16,6 @@ gala-anteater # gala-anteater 主目录
|
|
||||||
在文件`gala-anteater.yaml`中,配置`gala-anteater`启动时所需的参数。该配置项中,主要包含:
|
|
||||||
- Global: 配置启动时的全局变量
|
|
||||||
- data_source: 时序数据的来源,目前支持`"prometheus"`(Prometheus)和`"aom"`(AOM)两种数据来源;
|
|
||||||
- - sys_level: 是否支持`系统级`异常检测,可选:`true`、`false`。
|
|
||||||
|
|
||||||
- Kafka: 配置中间件Kafka所需的参数
|
|
||||||
- server: Kafak对应的`server ip`,如:"10.xxx.xxx.xxx";
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user