!49 [sync] PR-48: Refactor: fencer: sleep 1s between reconnects
From: @openeuler-sync-bot Reviewed-by: @jxy_git Signed-off-by: @jxy_git
This commit is contained in:
commit
98a5b23c19
185
Refactor-fencer-sleep-1s-between-reconnects.patch
Normal file
185
Refactor-fencer-sleep-1s-between-reconnects.patch
Normal file
@ -0,0 +1,185 @@
|
||||
From 55b5b78c69089fd418c590eb265eef2f7b82d689 Mon Sep 17 00:00:00 2001
|
||||
From: bizhiyuan <bizhiyuan@kylinos.cn>
|
||||
Date: Thu, 31 Aug 2023 00:50:44 +0800
|
||||
Subject: [PATCH] Refactor: fencer: sleep 1s between reconnects
|
||||
|
||||
---
|
||||
daemons/controld/controld_control.c | 5 ++-
|
||||
daemons/controld/controld_fencing.c | 65 ++++++++++++++---------------
|
||||
daemons/controld/controld_fencing.h | 2 +-
|
||||
3 files changed, 36 insertions(+), 36 deletions(-)
|
||||
|
||||
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
|
||||
index ffc62a0..48efdd5 100644
|
||||
--- a/daemons/controld/controld_control.c
|
||||
+++ b/daemons/controld/controld_control.c
|
||||
@@ -504,8 +504,9 @@ do_started(long long action,
|
||||
} else {
|
||||
crm_notice("Pacemaker controller successfully started and accepting connections");
|
||||
}
|
||||
- controld_trigger_fencer_connect();
|
||||
-
|
||||
+ controld_set_fsa_input_flags(R_ST_REQUIRED);
|
||||
+ controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
|
||||
+
|
||||
controld_clear_fsa_input_flags(R_STARTING);
|
||||
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
|
||||
}
|
||||
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
||||
index 89cb61f..8f571b0 100644
|
||||
--- a/daemons/controld/controld_fencing.c
|
||||
+++ b/daemons/controld/controld_fencing.c
|
||||
@@ -391,7 +391,7 @@ execute_stonith_cleanup(void)
|
||||
*/
|
||||
|
||||
static stonith_t *stonith_api = NULL;
|
||||
-static crm_trigger_t *stonith_reconnect = NULL;
|
||||
+static mainloop_timer_t *controld_fencer_connect_timer = NULL;
|
||||
static char *te_client_id = NULL;
|
||||
|
||||
static gboolean
|
||||
@@ -448,8 +448,9 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
|
||||
|
||||
if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
|
||||
crm_crit("Fencing daemon connection failed");
|
||||
- mainloop_set_trigger(stonith_reconnect);
|
||||
-
|
||||
+ if (!mainloop_timer_running(controld_fencer_connect_timer)) {
|
||||
+ mainloop_timer_start(controld_fencer_connect_timer);
|
||||
+ }
|
||||
} else {
|
||||
crm_info("Fencing daemon disconnected");
|
||||
}
|
||||
@@ -647,14 +648,14 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
|
||||
/*!
|
||||
* \brief Connect to fencer
|
||||
*
|
||||
- * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
|
||||
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
|
||||
*
|
||||
- * \return TRUE
|
||||
+ * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
|
||||
* \note If user_data is NULL, this will wait 2s between attempts, for up to
|
||||
* 30 attempts, meaning the controller could be blocked as long as 58s.
|
||||
*/
|
||||
-static gboolean
|
||||
-te_connect_stonith(gpointer user_data)
|
||||
+gboolean
|
||||
+controld_timer_fencer_connect(gpointer user_data)
|
||||
{
|
||||
int rc = pcmk_ok;
|
||||
|
||||
@@ -662,13 +663,13 @@ te_connect_stonith(gpointer user_data)
|
||||
stonith_api = stonith_api_new();
|
||||
if (stonith_api == NULL) {
|
||||
crm_err("Could not connect to fencer: API memory allocation failed");
|
||||
- return TRUE;
|
||||
+ return G_SOURCE_REMOVE;
|
||||
}
|
||||
}
|
||||
|
||||
if (stonith_api->state != stonith_disconnected) {
|
||||
crm_trace("Already connected to fencer, no need to retry");
|
||||
- return TRUE;
|
||||
+ return G_SOURCE_REMOVE;
|
||||
}
|
||||
|
||||
if (user_data == NULL) {
|
||||
@@ -681,17 +682,31 @@ te_connect_stonith(gpointer user_data)
|
||||
} else {
|
||||
// Non-blocking (retry failures later in main loop)
|
||||
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
|
||||
+
|
||||
+
|
||||
+ if (controld_fencer_connect_timer == NULL) {
|
||||
+ controld_fencer_connect_timer =
|
||||
+ mainloop_timer_add("controld_fencer_connect", 1000,
|
||||
+ TRUE, controld_timer_fencer_connect,
|
||||
+ GINT_TO_POINTER(TRUE));
|
||||
+ }
|
||||
+
|
||||
if (rc != pcmk_ok) {
|
||||
if (pcmk_is_set(controld_globals.fsa_input_register,
|
||||
R_ST_REQUIRED)) {
|
||||
crm_notice("Fencer connection failed (will retry): %s "
|
||||
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
||||
- mainloop_set_trigger(stonith_reconnect);
|
||||
- } else {
|
||||
+
|
||||
+ if (!mainloop_timer_running(controld_fencer_connect_timer)) {
|
||||
+ mainloop_timer_start(controld_fencer_connect_timer);
|
||||
+ }
|
||||
+
|
||||
+ return G_SOURCE_CONTINUE;
|
||||
+ } else {
|
||||
crm_info("Fencer connection failed (ignoring because no longer required): %s "
|
||||
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
||||
}
|
||||
- return TRUE;
|
||||
+ return G_SOURCE_CONTINUE;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -709,23 +724,7 @@ te_connect_stonith(gpointer user_data)
|
||||
crm_notice("Fencer successfully connected");
|
||||
}
|
||||
|
||||
- return TRUE;
|
||||
-}
|
||||
-
|
||||
-/*!
|
||||
- \internal
|
||||
- \brief Schedule fencer connection attempt in main loop
|
||||
-*/
|
||||
-void
|
||||
-controld_trigger_fencer_connect(void)
|
||||
-{
|
||||
- if (stonith_reconnect == NULL) {
|
||||
- stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
|
||||
- te_connect_stonith,
|
||||
- GINT_TO_POINTER(TRUE));
|
||||
- }
|
||||
- controld_set_fsa_input_flags(R_ST_REQUIRED);
|
||||
- mainloop_set_trigger(stonith_reconnect);
|
||||
+ return G_SOURCE_REMOVE;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -745,9 +744,9 @@ controld_disconnect_fencer(bool destroy)
|
||||
stonith_api->cmds->free(stonith_api);
|
||||
stonith_api = NULL;
|
||||
}
|
||||
- if (stonith_reconnect) {
|
||||
- mainloop_destroy_trigger(stonith_reconnect);
|
||||
- stonith_reconnect = NULL;
|
||||
+ if (controld_fencer_connect_timer) {
|
||||
+ mainloop_timer_del(controld_fencer_connect_timer);
|
||||
+ controld_fencer_connect_timer = NULL;
|
||||
}
|
||||
if (te_client_id) {
|
||||
free(te_client_id);
|
||||
@@ -981,7 +980,7 @@ controld_execute_fence_action(pcmk__graph_t *graph,
|
||||
priority_delay ? priority_delay : "");
|
||||
|
||||
/* Passing NULL means block until we can connect... */
|
||||
- te_connect_stonith(NULL);
|
||||
+ controld_timer_fencer_connect(NULL);
|
||||
|
||||
pcmk__scan_min_int(priority_delay, &delay_i, 0);
|
||||
rc = fence_with_delay(target, type, delay_i);
|
||||
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
|
||||
index 86a5050..76779c6 100644
|
||||
--- a/daemons/controld/controld_fencing.h
|
||||
+++ b/daemons/controld/controld_fencing.h
|
||||
@@ -19,7 +19,7 @@ void controld_configure_fencing(GHashTable *options);
|
||||
void st_fail_count_reset(const char * target);
|
||||
|
||||
// stonith API client
|
||||
-void controld_trigger_fencer_connect(void);
|
||||
+gboolean controld_timer_fencer_connect(gpointer user_data);
|
||||
void controld_disconnect_fencer(bool destroy);
|
||||
int controld_execute_fence_action(pcmk__graph_t *graph,
|
||||
pcmk__graph_action_t *action);
|
||||
--
|
||||
2.27.0
|
||||
|
||||
@ -17,7 +17,7 @@
|
||||
## can be incremented to build packages reliably considered "newer"
|
||||
## than previously built packages with the same pcmkversion)
|
||||
%global pcmkversion 2.1.6
|
||||
%global specversion 5
|
||||
%global specversion 6
|
||||
|
||||
## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build
|
||||
%global commit 6fdc9deea294bbad629b003c6ae036aaed8e3ee0
|
||||
@ -155,7 +155,7 @@ Patch0: 0001-Fix-glib-assertions.patch
|
||||
Patch1: 0001-Add-the-parameter-of-dampening-and-fix-attrd_updater.patch
|
||||
Patch2: 0001-Add-the-parameter-of-dampening-and-fix-attrd_updater-HealthIOWait.patch
|
||||
Patch3: Fix-libcrmcommon-wait-for-reply-from-appropriate-con.patch
|
||||
|
||||
Patch4: Refactor-fencer-sleep-1s-between-reconnects.patch
|
||||
# upstream commits
|
||||
|
||||
Requires: resource-agents
|
||||
@ -775,6 +775,9 @@ exit 0
|
||||
%license %{nagios_name}-%{nagios_hash}/COPYING
|
||||
|
||||
%changelog
|
||||
* Thu Aug 31 2023 bizhiyuan <bizhiyuan@kylinos.cn> - 2.1.6-6
|
||||
- Refactor: fencer: sleep 1s between reconnects
|
||||
|
||||
* Fri Aug 25 2023 zhanghan <zhanghan@kylinos.cn> - 2.1.6-5
|
||||
- Fix: libcrmcommon: wait for reply from appropriate controller commands
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user