sync code from 22.03-lts-sp1
This commit is contained in:
parent
7ecc111b8d
commit
f3d5c5a857
88
0001-optimize-cause-location-with-time-delay.patch
Normal file
88
0001-optimize-cause-location-with-time-delay.patch
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
From be8f48eed633d99aaf9eadd25d7562391d0807b9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: algorithmofdish <hexiujun1@huawei.com>
|
||||||
|
Date: Wed, 14 Dec 2022 15:30:06 +0800
|
||||||
|
Subject: [PATCH] perf(infer): optimize cause location with time delay
|
||||||
|
|
||||||
|
---
|
||||||
|
cause_inference/abnormal_event.py | 8 ++++++++
|
||||||
|
cause_inference/cause_infer.py | 11 ++++++++---
|
||||||
|
2 files changed, 16 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/cause_inference/abnormal_event.py b/cause_inference/abnormal_event.py
|
||||||
|
index f55c3d0..599d72d 100644
|
||||||
|
--- a/cause_inference/abnormal_event.py
|
||||||
|
+++ b/cause_inference/abnormal_event.py
|
||||||
|
@@ -2,6 +2,7 @@ import json
|
||||||
|
from enum import Enum
|
||||||
|
from queue import Queue, Empty
|
||||||
|
from typing import List
|
||||||
|
+import time
|
||||||
|
|
||||||
|
from kafka import KafkaConsumer
|
||||||
|
|
||||||
|
@@ -37,6 +38,7 @@ class AbnEvtMgt:
|
||||||
|
except Empty as ex:
|
||||||
|
raise NoKpiEventException from ex
|
||||||
|
|
||||||
|
+ self.wait_future_evts(abn_kpi.timestamp)
|
||||||
|
self.consume_kpi_evts_with_deadline(abn_kpi.timestamp)
|
||||||
|
self.consume_metric_evts_with_deadline(abn_kpi.timestamp)
|
||||||
|
self.clear_aging_evts(abn_kpi.timestamp)
|
||||||
|
@@ -145,6 +147,12 @@ class AbnEvtMgt:
|
||||||
|
def is_future(self, evt_ts, cur_ts):
|
||||||
|
return evt_ts > cur_ts + self.future_duration
|
||||||
|
|
||||||
|
+ def wait_future_evts(self, evt_ts):
|
||||||
|
+ cur_ts = int(time.time()) * 1000
|
||||||
|
+ if evt_ts <= cur_ts < evt_ts + self.future_duration:
|
||||||
|
+ wait_sec = (evt_ts + self.future_duration - cur_ts) // 1000
|
||||||
|
+ time.sleep(wait_sec)
|
||||||
|
+
|
||||||
|
|
||||||
|
def preprocess_abn_score(score):
|
||||||
|
return max(0, score)
|
||||||
|
diff --git a/cause_inference/cause_infer.py b/cause_inference/cause_infer.py
|
||||||
|
index 82a83e1..b22768f 100644
|
||||||
|
--- a/cause_inference/cause_infer.py
|
||||||
|
+++ b/cause_inference/cause_infer.py
|
||||||
|
@@ -58,10 +58,14 @@ class CauseLocator:
|
||||||
|
@staticmethod
|
||||||
|
def filter_causes(causes: List[Cause]) -> List[Cause]:
|
||||||
|
res = []
|
||||||
|
+ dup = set()
|
||||||
|
for cause in causes:
|
||||||
|
filtered_cause = CauseLocator.clear_virtual_cause(cause)
|
||||||
|
if filtered_cause is not None:
|
||||||
|
- res.append(filtered_cause)
|
||||||
|
+ key = (filtered_cause.metric_id, filtered_cause.entity_id)
|
||||||
|
+ if key not in dup:
|
||||||
|
+ dup.add(key)
|
||||||
|
+ res.append(filtered_cause)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def construct_causal_graph(self, entity_causal_relations: List[tuple], abn_metrics: List[AbnormalEvent],
|
||||||
|
@@ -106,10 +110,11 @@ class CauseLocator:
|
||||||
|
self.topo_ts = self.topo_db_mgt.query_recent_topo_ts(self.abn_kpi.timestamp // 1000)
|
||||||
|
|
||||||
|
def calc_corr_score(self, causal_graph: CausalGraph):
|
||||||
|
+ end_ts = self.abn_kpi.timestamp // 1000 + infer_config.infer_conf.get('evt_future_duration')
|
||||||
|
if not self.abn_kpi.hist_data:
|
||||||
|
hist_data = self.metric_db_mgt.query_metric_hist_data(self.abn_kpi.abnormal_metric_id,
|
||||||
|
self.abn_kpi.metric_labels,
|
||||||
|
- self.topo_ts)
|
||||||
|
+ end_ts)
|
||||||
|
self.abn_kpi.set_hist_data(hist_data)
|
||||||
|
|
||||||
|
for node_id, node_attrs in causal_graph.entity_cause_graph.nodes.items():
|
||||||
|
@@ -120,7 +125,7 @@ class CauseLocator:
|
||||||
|
|
||||||
|
abn_metrics = causal_graph.get_abnormal_metrics(node_id)
|
||||||
|
for metric_id, metric_attrs in abn_metrics.items():
|
||||||
|
- metric_hist_data = self.metric_db_mgt.query_metric_hist_data(metric_id, metric_labels, self.topo_ts)
|
||||||
|
+ metric_hist_data = self.metric_db_mgt.query_metric_hist_data(metric_id, metric_labels, end_ts)
|
||||||
|
|
||||||
|
data_trend = trend(metric_hist_data)
|
||||||
|
metric_attrs.setdefault('real_trend', data_trend)
|
||||||
|
--
|
||||||
|
2.21.0.windows.1
|
||||||
|
|
||||||
82
0002-optimize-infer-rule-config.patch
Normal file
82
0002-optimize-infer-rule-config.patch
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
From aeec448c24f3d724a8b79a9732091d8a833cedc5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: algorithmofdish <hexiujun1@huawei.com>
|
||||||
|
Date: Fri, 16 Dec 2022 17:04:51 +0800
|
||||||
|
Subject: [PATCH] refactor(infer): optimize infer rule config
|
||||||
|
|
||||||
|
---
|
||||||
|
README.md | 2 +-
|
||||||
|
cause_inference/causal_graph.py | 7 +++++++
|
||||||
|
cause_inference/output.py | 2 +-
|
||||||
|
config/infer-rule.yaml | 5 +----
|
||||||
|
4 files changed, 10 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/README.md b/README.md
|
||||||
|
index 91d8afe..422ae86 100644
|
||||||
|
--- a/README.md
|
||||||
|
+++ b/README.md
|
||||||
|
@@ -198,7 +198,7 @@ gala-spider 项目提供了两个功能模块,它们分别是:
|
||||||
|
- prometheus_server :指定 Prometheus 服务器地址
|
||||||
|
- arangodb_server :指定 arangodb 服务器地址
|
||||||
|
- kafka_server :指定 kafka 服务器地址
|
||||||
|
- - log_level :指定 gala-spider 日志打印级别
|
||||||
|
+ - log_level :指定 gala-inference 日志打印级别
|
||||||
|
|
||||||
|
此外,如果需要从宿主机的配置文件中启动容器,可通过挂载卷的方式执行:
|
||||||
|
|
||||||
|
diff --git a/cause_inference/causal_graph.py b/cause_inference/causal_graph.py
|
||||||
|
index f429bd6..10ad3ba 100644
|
||||||
|
--- a/cause_inference/causal_graph.py
|
||||||
|
+++ b/cause_inference/causal_graph.py
|
||||||
|
@@ -67,6 +67,13 @@ class CausalGraph:
|
||||||
|
else:
|
||||||
|
abn_metrics[abn_metric.abnormal_metric_id] = abn_metric.to_dict()
|
||||||
|
|
||||||
|
+ metric_labels = abn_metrics[abn_metric.abnormal_metric_id].get('metric_labels')
|
||||||
|
+ if not metric_labels:
|
||||||
|
+ metric_labels = dict(node_attrs.get('raw_data', {}))
|
||||||
|
+ if 'metrics' in metric_labels:
|
||||||
|
+ metric_labels.pop('metrics')
|
||||||
|
+ abn_metrics[abn_metric.abnormal_metric_id].update({'metric_labels': metric_labels})
|
||||||
|
+
|
||||||
|
def get_abnormal_metrics(self, node_id) -> dict:
|
||||||
|
return self.entity_cause_graph.nodes[node_id].get('abnormal_metrics', {})
|
||||||
|
|
||||||
|
diff --git a/cause_inference/output.py b/cause_inference/output.py
|
||||||
|
index 983b10c..51b9a54 100644
|
||||||
|
--- a/cause_inference/output.py
|
||||||
|
+++ b/cause_inference/output.py
|
||||||
|
@@ -39,7 +39,7 @@ def format_cause_metrics(causes: List[Cause]):
|
||||||
|
'metric_labels': node_attrs.get('metric_labels', {}),
|
||||||
|
'timestamp': node_attrs.get('timestamp'),
|
||||||
|
'desc': node_attrs.get('desc'),
|
||||||
|
- 'score': node_attrs.get('corr_score', 0.0),
|
||||||
|
+ 'score': cause.cause_score,
|
||||||
|
'keyword': cause_keyword_mgt.get_keyword_of_entity(node_attrs.get('entity_type')),
|
||||||
|
}
|
||||||
|
path = []
|
||||||
|
diff --git a/config/infer-rule.yaml b/config/infer-rule.yaml
|
||||||
|
index e88db7b..d5d1b51 100644
|
||||||
|
--- a/config/infer-rule.yaml
|
||||||
|
+++ b/config/infer-rule.yaml
|
||||||
|
@@ -10,10 +10,6 @@ metric_categories:
|
||||||
|
metrics:
|
||||||
|
- gala_gopher_proc_read_bytes
|
||||||
|
- gala_gopher_proc_write_bytes
|
||||||
|
- - gala_gopher_proc_less_4k_io_read
|
||||||
|
- - gala_gopher_proc_less_4k_io_write
|
||||||
|
- - gala_gopher_proc_greater_4k_io_read
|
||||||
|
- - gala_gopher_proc_greater_4k_io_write
|
||||||
|
trend: rise
|
||||||
|
-
|
||||||
|
category: PROC_IO_DELAY
|
||||||
|
@@ -49,6 +45,7 @@ metric_categories:
|
||||||
|
- gala_gopher_block_latency_req_last
|
||||||
|
- gala_gopher_block_latency_req_sum
|
||||||
|
- gala_gopher_block_latency_req_jitter
|
||||||
|
+ - gala_gopher_block_count_latency_req
|
||||||
|
nic:
|
||||||
|
-
|
||||||
|
category: NIC_DROP
|
||||||
|
--
|
||||||
|
2.21.0.windows.1
|
||||||
|
|
||||||
Binary file not shown.
BIN
gala-spider-1.0.1.tar.gz
Normal file
BIN
gala-spider-1.0.1.tar.gz
Normal file
Binary file not shown.
@ -1,8 +1,8 @@
|
|||||||
%define debug_package %{nil}
|
%define debug_package %{nil}
|
||||||
|
|
||||||
Name: gala-spider
|
Name: gala-spider
|
||||||
Version: 1.0.0
|
Version: 1.0.1
|
||||||
Release: 2
|
Release: 3
|
||||||
Summary: OS topological graph storage service and cause inference service for gala-ops project
|
Summary: OS topological graph storage service and cause inference service for gala-ops project
|
||||||
License: MulanPSL2
|
License: MulanPSL2
|
||||||
URL: https://gitee.com/openeuler/gala-spider
|
URL: https://gitee.com/openeuler/gala-spider
|
||||||
@ -11,10 +11,19 @@ Source0: %{name}-%{version}.tar.gz
|
|||||||
BuildRequires: python3-setuptools systemd
|
BuildRequires: python3-setuptools systemd
|
||||||
Requires: python3-%{name} = %{version}-%{release}
|
Requires: python3-%{name} = %{version}-%{release}
|
||||||
|
|
||||||
|
patch0: 0001-optimize-cause-location-with-time-delay.patch
|
||||||
|
patch1: 0002-optimize-infer-rule-config.patch
|
||||||
|
|
||||||
|
|
||||||
%description
|
%description
|
||||||
OS topological graph storage service for gala-ops project
|
OS topological graph storage service for gala-ops project
|
||||||
|
|
||||||
|
%package -n gala-ops
|
||||||
|
Summary: gala-anteater/spider/inference installation package
|
||||||
|
Requires: gala-anteater gala-inference gala-spider
|
||||||
|
|
||||||
|
%description -n gala-ops
|
||||||
|
This package requires gala-anteater/spider/inference, allowing users to install them all at once
|
||||||
|
|
||||||
%package -n python3-%{name}
|
%package -n python3-%{name}
|
||||||
Summary: Python3 package of gala-spider
|
Summary: Python3 package of gala-spider
|
||||||
@ -41,7 +50,7 @@ Python3 package of gala-inference
|
|||||||
|
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
%autosetup
|
%autosetup -p1
|
||||||
|
|
||||||
|
|
||||||
%build
|
%build
|
||||||
@ -64,7 +73,7 @@ fi
|
|||||||
%systemd_preun gala-spider.service
|
%systemd_preun gala-spider.service
|
||||||
|
|
||||||
%postun
|
%postun
|
||||||
%systemd_postun gala-spider.service
|
%systemd_postun_with_restart gala-spider.service
|
||||||
|
|
||||||
|
|
||||||
%pre -n gala-inference
|
%pre -n gala-inference
|
||||||
@ -79,7 +88,7 @@ fi
|
|||||||
%systemd_preun gala-inference.service
|
%systemd_preun gala-inference.service
|
||||||
|
|
||||||
%postun -n gala-inference
|
%postun -n gala-inference
|
||||||
%systemd_postun gala-inference.service
|
%systemd_postun_with_restart gala-inference.service
|
||||||
|
|
||||||
|
|
||||||
%files
|
%files
|
||||||
@ -91,6 +100,8 @@ fi
|
|||||||
%{_bindir}/spider-storage
|
%{_bindir}/spider-storage
|
||||||
%{_unitdir}/gala-spider.service
|
%{_unitdir}/gala-spider.service
|
||||||
|
|
||||||
|
%files -n gala-ops
|
||||||
|
%defattr(-,root,root)
|
||||||
|
|
||||||
%files -n python3-%{name}
|
%files -n python3-%{name}
|
||||||
%{python3_sitelib}/spider/*
|
%{python3_sitelib}/spider/*
|
||||||
@ -103,6 +114,7 @@ fi
|
|||||||
%config(noreplace) %{_sysconfdir}/gala-inference/gala-inference.yaml
|
%config(noreplace) %{_sysconfdir}/gala-inference/gala-inference.yaml
|
||||||
%config(noreplace) %{_sysconfdir}/gala-inference/ext-observe-meta.yaml
|
%config(noreplace) %{_sysconfdir}/gala-inference/ext-observe-meta.yaml
|
||||||
%config(noreplace) %{_sysconfdir}/gala-inference/infer-rule.yaml
|
%config(noreplace) %{_sysconfdir}/gala-inference/infer-rule.yaml
|
||||||
|
%config(noreplace) %{_sysconfdir}/gala-inference/cause-keyword.yaml
|
||||||
%{_bindir}/gala-inference
|
%{_bindir}/gala-inference
|
||||||
%{_unitdir}/gala-inference.service
|
%{_unitdir}/gala-inference.service
|
||||||
|
|
||||||
@ -113,6 +125,27 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Dec 21 2022 algorithmofdish <hexiujun1@huawei.com> - 1.0.1-3
|
||||||
|
- Optimize infer rule config
|
||||||
|
|
||||||
|
* Thu Dec 15 2022 algorithmofdish <hexiujun1@huawei.com> - 1.0.1-2
|
||||||
|
- Optimize cause location with time delay
|
||||||
|
|
||||||
|
* Wed Dec 14 2022 algorithmofdish <hexiujun1@huawei.com> - 1.0.1-1
|
||||||
|
- Update to 1.0.1: support cross host cause location
|
||||||
|
|
||||||
|
* Sat Dec 10 2022 algorithmofdish <hexiujun1@huawei.com> - 1.0.0-6
|
||||||
|
- Adaptation for abnormal event output change
|
||||||
|
|
||||||
|
* Tue Dec 6 2022 Zhen Chen <chenzhen126@huawei.com> - 1.0.0-5
|
||||||
|
- Provide gala-ops package to install anteater/spider/inference at once
|
||||||
|
|
||||||
|
* Mon Dec 5 2022 tangxin xie <xietangxin@huawei.com> - 1.0.0-4
|
||||||
|
- add restart service when update software
|
||||||
|
|
||||||
|
* Tue Nov 22 2022 algorithmofdish <hexiujun1@huawei.com> - 1.0.0-3
|
||||||
|
- Cause inference optimization
|
||||||
|
|
||||||
* Mon Nov 14 2022 Zhen Chen <chenzhen126@huawei.com> - 1.0.0-2
|
* Mon Nov 14 2022 Zhen Chen <chenzhen126@huawei.com> - 1.0.0-2
|
||||||
- Update 1.0.0 tarball
|
- Update 1.0.0 tarball
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user