pacemaker/Refactor-fencer-sleep-1s-between-reconnects.patch
bizhiyuan 7291a5c0ff Refactor: fencer: sleep 1s between reconnects
(cherry picked from commit 0ef3fd0936d149b148509e74ce081aae34235579)
2023-08-31 16:43:28 +08:00

186 lines
6.7 KiB
Diff

From 55b5b78c69089fd418c590eb265eef2f7b82d689 Mon Sep 17 00:00:00 2001
From: bizhiyuan <bizhiyuan@kylinos.cn>
Date: Thu, 31 Aug 2023 00:50:44 +0800
Subject: [PATCH] Refactor: fencer: sleep 1s between reconnects
---
daemons/controld/controld_control.c | 5 ++-
daemons/controld/controld_fencing.c | 65 ++++++++++++++---------------
daemons/controld/controld_fencing.h | 2 +-
3 files changed, 36 insertions(+), 36 deletions(-)
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index ffc62a0..48efdd5 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -504,8 +504,9 @@ do_started(long long action,
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
- controld_trigger_fencer_connect();
-
+ controld_set_fsa_input_flags(R_ST_REQUIRED);
+ controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
+
controld_clear_fsa_input_flags(R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 89cb61f..8f571b0 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -391,7 +391,7 @@ execute_stonith_cleanup(void)
*/
static stonith_t *stonith_api = NULL;
-static crm_trigger_t *stonith_reconnect = NULL;
+static mainloop_timer_t *controld_fencer_connect_timer = NULL;
static char *te_client_id = NULL;
static gboolean
@@ -448,8 +448,9 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
- mainloop_set_trigger(stonith_reconnect);
-
+ if (!mainloop_timer_running(controld_fencer_connect_timer)) {
+ mainloop_timer_start(controld_fencer_connect_timer);
+ }
} else {
crm_info("Fencing daemon disconnected");
}
@@ -647,14 +648,14 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
/*!
* \brief Connect to fencer
*
- * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
*
- * \return TRUE
+ * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
-static gboolean
-te_connect_stonith(gpointer user_data)
+gboolean
+controld_timer_fencer_connect(gpointer user_data)
{
int rc = pcmk_ok;
@@ -662,13 +663,13 @@ te_connect_stonith(gpointer user_data)
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
- return TRUE;
+ return G_SOURCE_REMOVE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
- return TRUE;
+ return G_SOURCE_REMOVE;
}
if (user_data == NULL) {
@@ -681,17 +682,31 @@ te_connect_stonith(gpointer user_data)
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
+
+
+ if (controld_fencer_connect_timer == NULL) {
+ controld_fencer_connect_timer =
+ mainloop_timer_add("controld_fencer_connect", 1000,
+ TRUE, controld_timer_fencer_connect,
+ GINT_TO_POINTER(TRUE));
+ }
+
if (rc != pcmk_ok) {
if (pcmk_is_set(controld_globals.fsa_input_register,
R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
- mainloop_set_trigger(stonith_reconnect);
- } else {
+
+ if (!mainloop_timer_running(controld_fencer_connect_timer)) {
+ mainloop_timer_start(controld_fencer_connect_timer);
+ }
+
+ return G_SOURCE_CONTINUE;
+ } else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
- return TRUE;
+ return G_SOURCE_CONTINUE;
}
}
@@ -709,23 +724,7 @@ te_connect_stonith(gpointer user_data)
crm_notice("Fencer successfully connected");
}
- return TRUE;
-}
-
-/*!
- \internal
- \brief Schedule fencer connection attempt in main loop
-*/
-void
-controld_trigger_fencer_connect(void)
-{
- if (stonith_reconnect == NULL) {
- stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
- te_connect_stonith,
- GINT_TO_POINTER(TRUE));
- }
- controld_set_fsa_input_flags(R_ST_REQUIRED);
- mainloop_set_trigger(stonith_reconnect);
+ return G_SOURCE_REMOVE;
}
void
@@ -745,9 +744,9 @@ controld_disconnect_fencer(bool destroy)
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
- if (stonith_reconnect) {
- mainloop_destroy_trigger(stonith_reconnect);
- stonith_reconnect = NULL;
+ if (controld_fencer_connect_timer) {
+ mainloop_timer_del(controld_fencer_connect_timer);
+ controld_fencer_connect_timer = NULL;
}
if (te_client_id) {
free(te_client_id);
@@ -981,7 +980,7 @@ controld_execute_fence_action(pcmk__graph_t *graph,
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
- te_connect_stonith(NULL);
+ controld_timer_fencer_connect(NULL);
pcmk__scan_min_int(priority_delay, &delay_i, 0);
rc = fence_with_delay(target, type, delay_i);
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
index 86a5050..76779c6 100644
--- a/daemons/controld/controld_fencing.h
+++ b/daemons/controld/controld_fencing.h
@@ -19,7 +19,7 @@ void controld_configure_fencing(GHashTable *options);
void st_fail_count_reset(const char * target);
// stonith API client
-void controld_trigger_fencer_connect(void);
+gboolean controld_timer_fencer_connect(gpointer user_data);
void controld_disconnect_fencer(bool destroy);
int controld_execute_fence_action(pcmk__graph_t *graph,
pcmk__graph_action_t *action);
--
2.27.0