87 lines
2.7 KiB
Diff
87 lines
2.7 KiB
Diff
From d03594b0313db71413b9dcb040f8d5c4da7213b1 Mon Sep 17 00:00:00 2001
|
|
From: David Sterba <dsterba@suse.com>
|
|
Date: Thu, 18 Apr 2024 18:24:48 +0800
|
|
Subject: [PATCH] fix exclusive op enqueue timeout
|
|
There's a report that 'btrfs balance start --enqueue' does not properly
|
|
wait when there are multiple instances started. The command does a busy
|
|
wait instead of timeouts.
|
|
|
|
Strace output:
|
|
|
|
0.000006 pselect6(5, NULL, NULL, [4], {tv_sec=60, tv_nsec=0}, NULL) = 1 (except [4], left {tv_sec=59, tv_nsec=999999716})
|
|
0.000008 pselect6(5, NULL, NULL, [4], {tv_sec=29, tv_nsec=999999000}, NULL) = 1 (except [4], left {tv_sec=29, tv_nsec=999998786})
|
|
|
|
After the first select there's almost the entire time left, the second
|
|
one starts right after it.
|
|
|
|
Polling/selecting sysfs files is possible under some conditions:
|
|
|
|
- the file descriptor must be reopened before each poll/select
|
|
- the whole buffer must be read too
|
|
|
|
With that in place it now works as expected. The remaining timeout logic
|
|
is slightly adjusted to wait at most 10 seconds so the pending jobs do
|
|
not wait too long if there's still a lot of time left from the first
|
|
select.
|
|
|
|
Issue: #746
|
|
Signed-off-by: David Sterba <dsterba@suse.com>
|
|
---
|
|
common/utils.c | 21 ++++++++++++++++++++-
|
|
1 file changed, 20 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/common/utils.c b/common/utils.c
|
|
index 62f0e3f..7cde492 100644
|
|
--- a/common/utils.c
|
|
+++ b/common/utils.c
|
|
@@ -1326,26 +1326,45 @@ int check_running_fs_exclop(int fd, enum exclusive_operation start, bool enqueue
|
|
fflush(stdout);
|
|
}
|
|
|
|
+ /*
|
|
+ * The sysfs file descriptor needs to be reopened and all data read
|
|
+ * before each select().
|
|
+ */
|
|
while (exclop > 0) {
|
|
fd_set fds;
|
|
struct timeval tv = { .tv_sec = 60, .tv_usec = 0 };
|
|
+ char tmp[1024];
|
|
|
|
+ close(sysfs_fd);
|
|
+ sysfs_fd = sysfs_open_fsid_file(fd, "exclusive_operation");
|
|
+ if (sysfs_fd < 0)
|
|
+ return sysfs_fd;
|
|
FD_ZERO(&fds);
|
|
FD_SET(sysfs_fd, &fds);
|
|
|
|
+ ret = read(sysfs_fd, tmp, sizeof(tmp));
|
|
ret = select(sysfs_fd + 1, NULL, NULL, &fds, &tv);
|
|
if (ret < 0) {
|
|
ret = -errno;
|
|
break;
|
|
}
|
|
if (ret > 0) {
|
|
+ close(sysfs_fd);
|
|
+ sysfs_fd = sysfs_open_fsid_file(fd, "exclusive_operation");
|
|
+ if (sysfs_fd < 0)
|
|
+ return sysfs_fd;
|
|
+
|
|
+ FD_ZERO(&fds);
|
|
+ FD_SET(sysfs_fd, &fds);
|
|
+
|
|
+ ret = read(sysfs_fd, tmp, sizeof(tmp));
|
|
/*
|
|
* Notified before the timeout, check again before
|
|
* returning. In case there are more operations
|
|
* waiting, we want to reduce the chances to race so
|
|
* reuse the remaining time to randomize the order.
|
|
*/
|
|
- tv.tv_sec /= 2;
|
|
+ tv.tv_sec = (tv.tv_sec % 10) + 1;
|
|
ret = select(sysfs_fd + 1, NULL, NULL, &fds, &tv);
|
|
exclop = get_fs_exclop(fd);
|
|
if (exclop <= 0)
|
|
--
|
|
2.43.0
|
|
|