119 lines
2.8 KiB
Diff
119 lines
2.8 KiB
Diff
From 3cfa90b54367168cc1e473af004675f12816e955 Mon Sep 17 00:00:00 2001
|
|
From: donghaobo <donghaobo@kuaishou.com>
|
|
Date: Fri, 17 Nov 2023 13:55:22 +0800
|
|
Subject: [PATCH] strace: fix potential deadlock during cleanup
|
|
|
|
strace -f can potentially deadlock during cleanup if the tracee
|
|
is using vfork or CLONE_VFORK to spawn threads.
|
|
|
|
On linux, calling vfork will cause the calling thread to 'D' state
|
|
until the child process calls execve or exit. Therefore, strace
|
|
should detach the child process first, otherwise it can wait
|
|
indefinitely for the calling thread in 'D' state.
|
|
|
|
Reproducer:
|
|
|
|
/*
|
|
* Start tracing with strace -f,
|
|
* then press Ctrl-C within 9 seconds to interrupt.
|
|
*/
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <sys/wait.h>
|
|
int main(void)
|
|
{
|
|
pid_t pid = vfork();
|
|
if (pid < 0)
|
|
return 1;
|
|
if (pid) {
|
|
int status;
|
|
waitpid(pid, &status, 0);
|
|
return 0;
|
|
}
|
|
sleep(9);
|
|
_exit(0);
|
|
}
|
|
|
|
* src/strace.c (cleanup): Do not call detach() for each tracee
|
|
one by one as it can deadlock, instead call interrupt_or_stop()
|
|
for each tracee and after that enter a wait loop calling
|
|
detach_interrupted_or_stopped() for each tracee as soon as
|
|
they become ready.
|
|
* NEWS: Mention this fix.
|
|
|
|
Co-authored-by: Dmitry V. Levin <ldv@strace.io>
|
|
|
|
Reference: https://github.com/strace/strace/commit/3cfa90b54367168cc1e473af004675f12816e955
|
|
Conflict: Remove NEWS
|
|
---
|
|
NEWS | 2 ++
|
|
src/strace.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
|
|
2 files changed, 46 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/src/strace.c b/src/strace.c
|
|
index ef3360b95..04b3c1dfe 100644
|
|
--- a/src/strace.c
|
|
+++ b/src/strace.c
|
|
@@ -3168,6 +3168,8 @@ cleanup(int fatal_sig)
|
|
if (!fatal_sig)
|
|
fatal_sig = SIGTERM;
|
|
|
|
+ size_t num_to_wait = 0;
|
|
+
|
|
for (size_t i = 0; i < tcbtabsize; ++i) {
|
|
struct tcb *tcp = tcbtab[i];
|
|
if (!tcp->pid)
|
|
@@ -3177,7 +3179,48 @@ cleanup(int fatal_sig)
|
|
kill(tcp->pid, SIGCONT);
|
|
kill(tcp->pid, fatal_sig);
|
|
}
|
|
- detach(tcp);
|
|
+ if (interrupt_or_stop(tcp))
|
|
+ ++num_to_wait;
|
|
+ else
|
|
+ droptcb_verbose(tcp);
|
|
+ }
|
|
+
|
|
+ while (num_to_wait) {
|
|
+ int status;
|
|
+ pid_t pid = waitpid(-1, &status, __WALL);
|
|
+
|
|
+ if (pid < 0) {
|
|
+ if (errno == EINTR)
|
|
+ continue;
|
|
+ /* ECHILD is not expected */
|
|
+ perror_func_msg("waitpid(-1, __WALL)");
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (pid == popen_pid) {
|
|
+ if (!WIFSTOPPED(status))
|
|
+ popen_pid = 0;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (debug_flag)
|
|
+ print_debug_info(pid, status);
|
|
+
|
|
+ struct tcb *tcp = pid2tcb(pid);
|
|
+ if (!tcp) {
|
|
+ if (!is_number_in_set(QUIET_EXIT, quiet_set)) {
|
|
+ /*
|
|
+ * This can happen if we inherited an unknown child.
|
|
+ */
|
|
+ error_msg("Exit of unknown pid %u ignored", pid);
|
|
+ }
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (detach_interrupted_or_stopped(tcp, status)) {
|
|
+ droptcb_verbose(tcp);
|
|
+ --num_to_wait;
|
|
+ }
|
|
}
|
|
}
|
|
|
|
--
|
|
2.33.0
|
|
|