!49 [sync] PR-48: Refactor: fencer: sleep 1s between reconnects

From: @openeuler-sync-bot 
Reviewed-by: @jxy_git 
Signed-off-by: @jxy_git
This commit is contained in:
openeuler-ci-bot 2023-08-31 09:55:36 +00:00 committed by Gitee
commit 98a5b23c19
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
2 changed files with 190 additions and 2 deletions

View File

@ -0,0 +1,185 @@
From 55b5b78c69089fd418c590eb265eef2f7b82d689 Mon Sep 17 00:00:00 2001
From: bizhiyuan <bizhiyuan@kylinos.cn>
Date: Thu, 31 Aug 2023 00:50:44 +0800
Subject: [PATCH] Refactor: fencer: sleep 1s between reconnects
---
daemons/controld/controld_control.c | 5 ++-
daemons/controld/controld_fencing.c | 65 ++++++++++++++---------------
daemons/controld/controld_fencing.h | 2 +-
3 files changed, 36 insertions(+), 36 deletions(-)
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index ffc62a0..48efdd5 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -504,8 +504,9 @@ do_started(long long action,
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
- controld_trigger_fencer_connect();
-
+ controld_set_fsa_input_flags(R_ST_REQUIRED);
+ controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
+
controld_clear_fsa_input_flags(R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 89cb61f..8f571b0 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -391,7 +391,7 @@ execute_stonith_cleanup(void)
*/
static stonith_t *stonith_api = NULL;
-static crm_trigger_t *stonith_reconnect = NULL;
+static mainloop_timer_t *controld_fencer_connect_timer = NULL;
static char *te_client_id = NULL;
static gboolean
@@ -448,8 +448,9 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
- mainloop_set_trigger(stonith_reconnect);
-
+ if (!mainloop_timer_running(controld_fencer_connect_timer)) {
+ mainloop_timer_start(controld_fencer_connect_timer);
+ }
} else {
crm_info("Fencing daemon disconnected");
}
@@ -647,14 +648,14 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
/*!
* \brief Connect to fencer
*
- * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
*
- * \return TRUE
+ * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
-static gboolean
-te_connect_stonith(gpointer user_data)
+gboolean
+controld_timer_fencer_connect(gpointer user_data)
{
int rc = pcmk_ok;
@@ -662,13 +663,13 @@ te_connect_stonith(gpointer user_data)
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
- return TRUE;
+ return G_SOURCE_REMOVE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
- return TRUE;
+ return G_SOURCE_REMOVE;
}
if (user_data == NULL) {
@@ -681,17 +682,31 @@ te_connect_stonith(gpointer user_data)
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
+
+
+ if (controld_fencer_connect_timer == NULL) {
+ controld_fencer_connect_timer =
+ mainloop_timer_add("controld_fencer_connect", 1000,
+ TRUE, controld_timer_fencer_connect,
+ GINT_TO_POINTER(TRUE));
+ }
+
if (rc != pcmk_ok) {
if (pcmk_is_set(controld_globals.fsa_input_register,
R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
- mainloop_set_trigger(stonith_reconnect);
- } else {
+
+ if (!mainloop_timer_running(controld_fencer_connect_timer)) {
+ mainloop_timer_start(controld_fencer_connect_timer);
+ }
+
+ return G_SOURCE_CONTINUE;
+ } else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
- return TRUE;
+ return G_SOURCE_CONTINUE;
}
}
@@ -709,23 +724,7 @@ te_connect_stonith(gpointer user_data)
crm_notice("Fencer successfully connected");
}
- return TRUE;
-}
-
-/*!
- \internal
- \brief Schedule fencer connection attempt in main loop
-*/
-void
-controld_trigger_fencer_connect(void)
-{
- if (stonith_reconnect == NULL) {
- stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
- te_connect_stonith,
- GINT_TO_POINTER(TRUE));
- }
- controld_set_fsa_input_flags(R_ST_REQUIRED);
- mainloop_set_trigger(stonith_reconnect);
+ return G_SOURCE_REMOVE;
}
void
@@ -745,9 +744,9 @@ controld_disconnect_fencer(bool destroy)
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
- if (stonith_reconnect) {
- mainloop_destroy_trigger(stonith_reconnect);
- stonith_reconnect = NULL;
+ if (controld_fencer_connect_timer) {
+ mainloop_timer_del(controld_fencer_connect_timer);
+ controld_fencer_connect_timer = NULL;
}
if (te_client_id) {
free(te_client_id);
@@ -981,7 +980,7 @@ controld_execute_fence_action(pcmk__graph_t *graph,
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
- te_connect_stonith(NULL);
+ controld_timer_fencer_connect(NULL);
pcmk__scan_min_int(priority_delay, &delay_i, 0);
rc = fence_with_delay(target, type, delay_i);
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
index 86a5050..76779c6 100644
--- a/daemons/controld/controld_fencing.h
+++ b/daemons/controld/controld_fencing.h
@@ -19,7 +19,7 @@ void controld_configure_fencing(GHashTable *options);
void st_fail_count_reset(const char * target);
// stonith API client
-void controld_trigger_fencer_connect(void);
+gboolean controld_timer_fencer_connect(gpointer user_data);
void controld_disconnect_fencer(bool destroy);
int controld_execute_fence_action(pcmk__graph_t *graph,
pcmk__graph_action_t *action);
--
2.27.0

View File

@ -17,7 +17,7 @@
## can be incremented to build packages reliably considered "newer"
## than previously built packages with the same pcmkversion)
%global pcmkversion 2.1.6
%global specversion 5
%global specversion 6
## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build
%global commit 6fdc9deea294bbad629b003c6ae036aaed8e3ee0
@ -155,7 +155,7 @@ Patch0: 0001-Fix-glib-assertions.patch
Patch1: 0001-Add-the-parameter-of-dampening-and-fix-attrd_updater.patch
Patch2: 0001-Add-the-parameter-of-dampening-and-fix-attrd_updater-HealthIOWait.patch
Patch3: Fix-libcrmcommon-wait-for-reply-from-appropriate-con.patch
Patch4: Refactor-fencer-sleep-1s-between-reconnects.patch
# upstream commits
Requires: resource-agents
@ -775,6 +775,9 @@ exit 0
%license %{nagios_name}-%{nagios_hash}/COPYING
%changelog
* Thu Aug 31 2023 bizhiyuan <bizhiyuan@kylinos.cn> - 2.1.6-6
- Refactor: fencer: sleep 1s between reconnects
* Fri Aug 25 2023 zhanghan <zhanghan@kylinos.cn> - 2.1.6-5
- Fix: libcrmcommon: wait for reply from appropriate controller commands