From 55b5b78c69089fd418c590eb265eef2f7b82d689 Mon Sep 17 00:00:00 2001 From: bizhiyuan Date: Thu, 31 Aug 2023 00:50:44 +0800 Subject: [PATCH] Refactor: fencer: sleep 1s between reconnects --- daemons/controld/controld_control.c | 5 ++- daemons/controld/controld_fencing.c | 65 ++++++++++++++--------------- daemons/controld/controld_fencing.h | 2 +- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index ffc62a0..48efdd5 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -504,8 +504,9 @@ do_started(long long action, } else { crm_notice("Pacemaker controller successfully started and accepting connections"); } - controld_trigger_fencer_connect(); - + controld_set_fsa_input_flags(R_ST_REQUIRED); + controld_timer_fencer_connect(GINT_TO_POINTER(TRUE)); + controld_clear_fsa_input_flags(R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 89cb61f..8f571b0 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -391,7 +391,7 @@ execute_stonith_cleanup(void) */ static stonith_t *stonith_api = NULL; -static crm_trigger_t *stonith_reconnect = NULL; +static mainloop_timer_t *controld_fencer_connect_timer = NULL; static char *te_client_id = NULL; static gboolean @@ -448,8 +448,9 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); - mainloop_set_trigger(stonith_reconnect); - + if (!mainloop_timer_running(controld_fencer_connect_timer)) { + mainloop_timer_start(controld_fencer_connect_timer); + } } else { crm_info("Fencing daemon disconnected"); } @@ -647,14 +648,14 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) /*! * \brief Connect to fencer * - * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop + * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer * - * \return TRUE + * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ -static gboolean -te_connect_stonith(gpointer user_data) +gboolean +controld_timer_fencer_connect(gpointer user_data) { int rc = pcmk_ok; @@ -662,13 +663,13 @@ te_connect_stonith(gpointer user_data) stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); - return TRUE; + return G_SOURCE_REMOVE; } } if (stonith_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); - return TRUE; + return G_SOURCE_REMOVE; } if (user_data == NULL) { @@ -681,17 +682,31 @@ te_connect_stonith(gpointer user_data) } else { // Non-blocking (retry failures later in main loop) rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); + + + if (controld_fencer_connect_timer == NULL) { + controld_fencer_connect_timer = + mainloop_timer_add("controld_fencer_connect", 1000, + TRUE, controld_timer_fencer_connect, + GINT_TO_POINTER(TRUE)); + } + if (rc != pcmk_ok) { if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_notice("Fencer connection failed (will retry): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); - mainloop_set_trigger(stonith_reconnect); - } else { + + if (!mainloop_timer_running(controld_fencer_connect_timer)) { + mainloop_timer_start(controld_fencer_connect_timer); + } + + return G_SOURCE_CONTINUE; + } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } - return TRUE; + return G_SOURCE_CONTINUE; } } @@ -709,23 +724,7 @@ te_connect_stonith(gpointer user_data) crm_notice("Fencer successfully connected"); } - return TRUE; -} - -/*! - \internal - \brief Schedule fencer connection attempt in main loop -*/ -void -controld_trigger_fencer_connect(void) -{ - if (stonith_reconnect == NULL) { - stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, - te_connect_stonith, - GINT_TO_POINTER(TRUE)); - } - controld_set_fsa_input_flags(R_ST_REQUIRED); - mainloop_set_trigger(stonith_reconnect); + return G_SOURCE_REMOVE; } void @@ -745,9 +744,9 @@ controld_disconnect_fencer(bool destroy) stonith_api->cmds->free(stonith_api); stonith_api = NULL; } - if (stonith_reconnect) { - mainloop_destroy_trigger(stonith_reconnect); - stonith_reconnect = NULL; + if (controld_fencer_connect_timer) { + mainloop_timer_del(controld_fencer_connect_timer); + controld_fencer_connect_timer = NULL; } if (te_client_id) { free(te_client_id); @@ -981,7 +980,7 @@ controld_execute_fence_action(pcmk__graph_t *graph, priority_delay ? priority_delay : ""); /* Passing NULL means block until we can connect... */ - te_connect_stonith(NULL); + controld_timer_fencer_connect(NULL); pcmk__scan_min_int(priority_delay, &delay_i, 0); rc = fence_with_delay(target, type, delay_i); diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h index 86a5050..76779c6 100644 --- a/daemons/controld/controld_fencing.h +++ b/daemons/controld/controld_fencing.h @@ -19,7 +19,7 @@ void controld_configure_fencing(GHashTable *options); void st_fail_count_reset(const char * target); // stonith API client -void controld_trigger_fencer_connect(void); +gboolean controld_timer_fencer_connect(gpointer user_data); void controld_disconnect_fencer(bool destroy); int controld_execute_fence_action(pcmk__graph_t *graph, pcmk__graph_action_t *action); -- 2.27.0