176 lines
7.0 KiB
Diff
176 lines
7.0 KiB
Diff
From 5d6bf1efb225b964bfff398277e68345acdac1d0 Mon Sep 17 00:00:00 2001
|
|
From: David Teigland <teigland@redhat.com>
|
|
Date: Wed, 25 Sep 2019 14:23:14 -0500
|
|
Subject: [PATCH 126/180] lvmetad: fix sync cache to lvmetad
|
|
|
|
error could be reproduced follow those steps:
|
|
#!/bin/bash
|
|
vgcreate vgtest /dev/sdb
|
|
lvcreate -L 100M -n lv1 vgtest
|
|
while :
|
|
do
|
|
service lvm2-lvmetad restart
|
|
vgs &
|
|
pvscan &
|
|
lvcreate -L 100M -n lv2 vgtest &
|
|
lvchange /dev/vgtest/lv1 --addtag xxxxx &
|
|
wait
|
|
if ! lvs|grep lv2;then
|
|
echo "err create"
|
|
break
|
|
fi
|
|
sleep 1
|
|
lvremove -y /dev/vgtest/lv2
|
|
lvchange /dev/vgtest/lv1 --deltag xxxxx
|
|
done
|
|
|
|
and then fail to create vgtest/lv2, actually lv2 was created, while
|
|
the metadata written on disk is replaced by lvchange. It could look
|
|
up lv2 by calling dmsetup table, while lvs could not.
|
|
|
|
This is because, when lvmetad restarted, several lvm commands update
|
|
token concurrently, when lvcreate recieve "token_mismatch", it cancle
|
|
communicating with lvmetad, which leads to that lvmetad cache is not
|
|
sync with the metadata on disk, then lv2 is not committed to lvmetad
|
|
cache. The metadata of vgtest which lvchange query from lvmetad is
|
|
out of date. After lvchange, it use the old metadata cover the new one.
|
|
|
|
This patch let lvm process update token synchronously, only one command
|
|
update lvmetad token at a time.
|
|
|
|
lvmetad_pvscan_single send the metadata on a pv by sending "pv_found"
|
|
to lvmetad, while the metadata maybe out of date after waiting for the
|
|
chance to update lvmetad token. Call label_read to read metadata again.
|
|
|
|
Token mismatch may lead to problems, increase log level.
|
|
|
|
Signed-off-by: wangjufeng<wangjufeng@huawei.com>
|
|
---
|
|
daemons/lvmetad/lvmetad-core.c | 19 +++++++++++++++----
|
|
lib/cache/lvmetad.c | 31 +++++++++++++++++++++++++++++--
|
|
2 files changed, 44 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/daemons/lvmetad/lvmetad-core.c b/daemons/lvmetad/lvmetad-core.c
|
|
index 72473d7..c274880 100644
|
|
--- a/daemons/lvmetad/lvmetad-core.c
|
|
+++ b/daemons/lvmetad/lvmetad-core.c
|
|
@@ -2669,6 +2669,7 @@ static response handler(daemon_state s, client_handle h, request r)
|
|
int pid;
|
|
int cache_lock = 0;
|
|
int info_lock = 0;
|
|
+ uint64_t timegap = 0;
|
|
|
|
rq = daemon_request_str(r, "request", "NONE");
|
|
token = daemon_request_str(r, "token", "NONE");
|
|
@@ -2711,9 +2712,19 @@ static response handler(daemon_state s, client_handle h, request r)
|
|
state->update_cmd);
|
|
|
|
} else if (prev_in_progress && this_in_progress) {
|
|
+ timegap = _monotonic_seconds() - state->update_begin;
|
|
+ if (timegap < state->update_timeout) {
|
|
+ pthread_mutex_unlock(&state->token_lock);
|
|
+ return daemon_reply_simple("token_updating",
|
|
+ "expected = %s", state->token,
|
|
+ "update_pid = " FMTd64, (int64_t)state->update_pid,
|
|
+ "reason = %s", "another command has populated the cache",
|
|
+ NULL);
|
|
+ }
|
|
+
|
|
/* Current update is cancelled and replaced by a new update */
|
|
|
|
- DEBUGLOG(state, "token_update replacing pid %d begin %llu len %d cmd %s",
|
|
+ WARN(state, "token_update replacing pid %d begin %llu len %d cmd %s",
|
|
state->update_pid,
|
|
(unsigned long long)state->update_begin,
|
|
(int)(_monotonic_seconds() - state->update_begin),
|
|
@@ -2726,7 +2737,7 @@ static response handler(daemon_state s, client_handle h, request r)
|
|
state->update_pid = pid;
|
|
strncpy(state->update_cmd, cmd, CMD_NAME_SIZE - 1);
|
|
|
|
- DEBUGLOG(state, "token_update begin %llu timeout %d pid %d cmd %s",
|
|
+ WARN(state, "token_update begin %llu timeout %d pid %d cmd %s",
|
|
(unsigned long long)state->update_begin,
|
|
state->update_timeout,
|
|
state->update_pid,
|
|
@@ -2737,7 +2748,7 @@ static response handler(daemon_state s, client_handle h, request r)
|
|
|
|
if (state->update_pid != pid) {
|
|
/* If a pid doing update was cancelled, ignore its token update at the end. */
|
|
- DEBUGLOG(state, "token_update ignored from cancelled update pid %d", pid);
|
|
+ WARN(state, "token_update ignored from cancelled update pid %d", pid);
|
|
pthread_mutex_unlock(&state->token_lock);
|
|
|
|
return daemon_reply_simple("token_mismatch",
|
|
@@ -2748,7 +2759,7 @@ static response handler(daemon_state s, client_handle h, request r)
|
|
NULL);
|
|
}
|
|
|
|
- DEBUGLOG(state, "token_update end len %d pid %d new token %s",
|
|
+ WARN(state, "token_update end len %d pid %d new token %s",
|
|
(int)(_monotonic_seconds() - state->update_begin),
|
|
state->update_pid, token);
|
|
|
|
diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c
|
|
index 291a2b2..8dc12a6 100644
|
|
--- a/lib/cache/lvmetad.c
|
|
+++ b/lib/cache/lvmetad.c
|
|
@@ -552,7 +552,12 @@ static int _token_update(int *replaced_update)
|
|
const char *reply_str;
|
|
int update_pid;
|
|
int ending_our_update;
|
|
+ unsigned int wait_sec = 0;
|
|
+ uint64_t now = 0, wait_start = 0;
|
|
+ wait_sec = (unsigned int)_lvmetad_update_timeout;
|
|
+ unsigned int delay_usec = 0;
|
|
|
|
+retry:
|
|
log_debug_lvmetad("Sending lvmetad token_update %s", _lvmetad_token);
|
|
reply = _lvmetad_send(NULL, "token_update", NULL);
|
|
|
|
@@ -568,6 +573,28 @@ static int _token_update(int *replaced_update)
|
|
update_pid = (int)daemon_reply_int(reply, "update_pid", 0);
|
|
reply_str = daemon_reply_str(reply, "response", "");
|
|
|
|
+ if (!strcmp(reply_str, "token_updating")) {
|
|
+ daemon_reply_destroy(reply);
|
|
+ if (!(now = _monotonic_seconds())) {
|
|
+ log_print_unless_silent("_monotonic_seconds error");
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ if (!wait_start)
|
|
+ wait_start = now;
|
|
+
|
|
+ if (now - wait_start <= wait_sec) {
|
|
+ log_warn("lvmetad is being updated, retry for %u more seconds.",
|
|
+ wait_sec - (unsigned int)(now - wait_start));
|
|
+ delay_usec = 1000000 + lvm_even_rand(&_lvmetad_cmd->rand_seed, 1000000);
|
|
+ usleep(delay_usec);
|
|
+ goto retry;
|
|
+ }
|
|
+
|
|
+ log_print_unless_silent("Not using lvmetad after %u sec lvmetad_update_wait_time, no more try.", wait_sec);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
/*
|
|
* A mismatch can only happen when this command attempts to set the
|
|
* token to filter:<hash> at the end of its update, but the update has
|
|
@@ -578,11 +605,11 @@ static int _token_update(int *replaced_update)
|
|
|
|
ending_our_update = strcmp(_lvmetad_token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
|
|
|
|
- log_debug_lvmetad("Received token update mismatch expected \"%s\" our token \"%s\" update_pid %d our pid %d",
|
|
+ log_print_unless_silent("Received token update mismatch expected \"%s\" our token \"%s\" update_pid %d our pid %d",
|
|
token_expected, _lvmetad_token, update_pid, getpid());
|
|
|
|
if (ending_our_update && (update_pid != getpid())) {
|
|
- log_warn("WARNING: lvmetad was updated by another command (pid %d).", update_pid);
|
|
+ log_print_unless_silent("WARNING: lvmetad was updated by another command (pid %d).", update_pid);
|
|
} else {
|
|
/*
|
|
* Shouldn't happen.
|
|
--
|
|
2.19.1
|
|
|