186 lines
6.7 KiB
Diff
186 lines
6.7 KiB
Diff
|
|
From 55b5b78c69089fd418c590eb265eef2f7b82d689 Mon Sep 17 00:00:00 2001
|
||
|
|
From: bizhiyuan <bizhiyuan@kylinos.cn>
|
||
|
|
Date: Thu, 31 Aug 2023 00:50:44 +0800
|
||
|
|
Subject: [PATCH] Refactor: fencer: sleep 1s between reconnects
|
||
|
|
|
||
|
|
---
|
||
|
|
daemons/controld/controld_control.c | 5 ++-
|
||
|
|
daemons/controld/controld_fencing.c | 65 ++++++++++++++---------------
|
||
|
|
daemons/controld/controld_fencing.h | 2 +-
|
||
|
|
3 files changed, 36 insertions(+), 36 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
|
||
|
|
index ffc62a0..48efdd5 100644
|
||
|
|
--- a/daemons/controld/controld_control.c
|
||
|
|
+++ b/daemons/controld/controld_control.c
|
||
|
|
@@ -504,8 +504,9 @@ do_started(long long action,
|
||
|
|
} else {
|
||
|
|
crm_notice("Pacemaker controller successfully started and accepting connections");
|
||
|
|
}
|
||
|
|
- controld_trigger_fencer_connect();
|
||
|
|
-
|
||
|
|
+ controld_set_fsa_input_flags(R_ST_REQUIRED);
|
||
|
|
+ controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
|
||
|
|
+
|
||
|
|
controld_clear_fsa_input_flags(R_STARTING);
|
||
|
|
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
|
||
|
|
}
|
||
|
|
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
||
|
|
index 89cb61f..8f571b0 100644
|
||
|
|
--- a/daemons/controld/controld_fencing.c
|
||
|
|
+++ b/daemons/controld/controld_fencing.c
|
||
|
|
@@ -391,7 +391,7 @@ execute_stonith_cleanup(void)
|
||
|
|
*/
|
||
|
|
|
||
|
|
static stonith_t *stonith_api = NULL;
|
||
|
|
-static crm_trigger_t *stonith_reconnect = NULL;
|
||
|
|
+static mainloop_timer_t *controld_fencer_connect_timer = NULL;
|
||
|
|
static char *te_client_id = NULL;
|
||
|
|
|
||
|
|
static gboolean
|
||
|
|
@@ -448,8 +448,9 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
|
||
|
|
|
||
|
|
if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
|
||
|
|
crm_crit("Fencing daemon connection failed");
|
||
|
|
- mainloop_set_trigger(stonith_reconnect);
|
||
|
|
-
|
||
|
|
+ if (!mainloop_timer_running(controld_fencer_connect_timer)) {
|
||
|
|
+ mainloop_timer_start(controld_fencer_connect_timer);
|
||
|
|
+ }
|
||
|
|
} else {
|
||
|
|
crm_info("Fencing daemon disconnected");
|
||
|
|
}
|
||
|
|
@@ -647,14 +648,14 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
|
||
|
|
/*!
|
||
|
|
* \brief Connect to fencer
|
||
|
|
*
|
||
|
|
- * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
|
||
|
|
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
|
||
|
|
*
|
||
|
|
- * \return TRUE
|
||
|
|
+ * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
|
||
|
|
* \note If user_data is NULL, this will wait 2s between attempts, for up to
|
||
|
|
* 30 attempts, meaning the controller could be blocked as long as 58s.
|
||
|
|
*/
|
||
|
|
-static gboolean
|
||
|
|
-te_connect_stonith(gpointer user_data)
|
||
|
|
+gboolean
|
||
|
|
+controld_timer_fencer_connect(gpointer user_data)
|
||
|
|
{
|
||
|
|
int rc = pcmk_ok;
|
||
|
|
|
||
|
|
@@ -662,13 +663,13 @@ te_connect_stonith(gpointer user_data)
|
||
|
|
stonith_api = stonith_api_new();
|
||
|
|
if (stonith_api == NULL) {
|
||
|
|
crm_err("Could not connect to fencer: API memory allocation failed");
|
||
|
|
- return TRUE;
|
||
|
|
+ return G_SOURCE_REMOVE;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (stonith_api->state != stonith_disconnected) {
|
||
|
|
crm_trace("Already connected to fencer, no need to retry");
|
||
|
|
- return TRUE;
|
||
|
|
+ return G_SOURCE_REMOVE;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (user_data == NULL) {
|
||
|
|
@@ -681,17 +682,31 @@ te_connect_stonith(gpointer user_data)
|
||
|
|
} else {
|
||
|
|
// Non-blocking (retry failures later in main loop)
|
||
|
|
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
|
||
|
|
+
|
||
|
|
+
|
||
|
|
+ if (controld_fencer_connect_timer == NULL) {
|
||
|
|
+ controld_fencer_connect_timer =
|
||
|
|
+ mainloop_timer_add("controld_fencer_connect", 1000,
|
||
|
|
+ TRUE, controld_timer_fencer_connect,
|
||
|
|
+ GINT_TO_POINTER(TRUE));
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
if (rc != pcmk_ok) {
|
||
|
|
if (pcmk_is_set(controld_globals.fsa_input_register,
|
||
|
|
R_ST_REQUIRED)) {
|
||
|
|
crm_notice("Fencer connection failed (will retry): %s "
|
||
|
|
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
||
|
|
- mainloop_set_trigger(stonith_reconnect);
|
||
|
|
- } else {
|
||
|
|
+
|
||
|
|
+ if (!mainloop_timer_running(controld_fencer_connect_timer)) {
|
||
|
|
+ mainloop_timer_start(controld_fencer_connect_timer);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return G_SOURCE_CONTINUE;
|
||
|
|
+ } else {
|
||
|
|
crm_info("Fencer connection failed (ignoring because no longer required): %s "
|
||
|
|
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
||
|
|
}
|
||
|
|
- return TRUE;
|
||
|
|
+ return G_SOURCE_CONTINUE;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -709,23 +724,7 @@ te_connect_stonith(gpointer user_data)
|
||
|
|
crm_notice("Fencer successfully connected");
|
||
|
|
}
|
||
|
|
|
||
|
|
- return TRUE;
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
-/*!
|
||
|
|
- \internal
|
||
|
|
- \brief Schedule fencer connection attempt in main loop
|
||
|
|
-*/
|
||
|
|
-void
|
||
|
|
-controld_trigger_fencer_connect(void)
|
||
|
|
-{
|
||
|
|
- if (stonith_reconnect == NULL) {
|
||
|
|
- stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
|
||
|
|
- te_connect_stonith,
|
||
|
|
- GINT_TO_POINTER(TRUE));
|
||
|
|
- }
|
||
|
|
- controld_set_fsa_input_flags(R_ST_REQUIRED);
|
||
|
|
- mainloop_set_trigger(stonith_reconnect);
|
||
|
|
+ return G_SOURCE_REMOVE;
|
||
|
|
}
|
||
|
|
|
||
|
|
void
|
||
|
|
@@ -745,9 +744,9 @@ controld_disconnect_fencer(bool destroy)
|
||
|
|
stonith_api->cmds->free(stonith_api);
|
||
|
|
stonith_api = NULL;
|
||
|
|
}
|
||
|
|
- if (stonith_reconnect) {
|
||
|
|
- mainloop_destroy_trigger(stonith_reconnect);
|
||
|
|
- stonith_reconnect = NULL;
|
||
|
|
+ if (controld_fencer_connect_timer) {
|
||
|
|
+ mainloop_timer_del(controld_fencer_connect_timer);
|
||
|
|
+ controld_fencer_connect_timer = NULL;
|
||
|
|
}
|
||
|
|
if (te_client_id) {
|
||
|
|
free(te_client_id);
|
||
|
|
@@ -981,7 +980,7 @@ controld_execute_fence_action(pcmk__graph_t *graph,
|
||
|
|
priority_delay ? priority_delay : "");
|
||
|
|
|
||
|
|
/* Passing NULL means block until we can connect... */
|
||
|
|
- te_connect_stonith(NULL);
|
||
|
|
+ controld_timer_fencer_connect(NULL);
|
||
|
|
|
||
|
|
pcmk__scan_min_int(priority_delay, &delay_i, 0);
|
||
|
|
rc = fence_with_delay(target, type, delay_i);
|
||
|
|
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
|
||
|
|
index 86a5050..76779c6 100644
|
||
|
|
--- a/daemons/controld/controld_fencing.h
|
||
|
|
+++ b/daemons/controld/controld_fencing.h
|
||
|
|
@@ -19,7 +19,7 @@ void controld_configure_fencing(GHashTable *options);
|
||
|
|
void st_fail_count_reset(const char * target);
|
||
|
|
|
||
|
|
// stonith API client
|
||
|
|
-void controld_trigger_fencer_connect(void);
|
||
|
|
+gboolean controld_timer_fencer_connect(gpointer user_data);
|
||
|
|
void controld_disconnect_fencer(bool destroy);
|
||
|
|
int controld_execute_fence_action(pcmk__graph_t *graph,
|
||
|
|
pcmk__graph_action_t *action);
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|