strace: fix potential deadlock during cleanup

(cherry picked from commit fb283e98eb9df90d0a98bf4c8053ea3ffa88c592)
This commit is contained in:
wangxiao65 2024-12-27 03:16:42 +00:00 committed by openeuler-sync-bot
parent 2548cb66e1
commit c2f4a25c7a
7 changed files with 643 additions and 1 deletions

View File

@ -0,0 +1,67 @@
From 9ccfe110343369ea8986fb8781a679373b9b8892 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@strace.io>
Date: Thu, 23 Nov 2023 08:00:00 +0000
Subject: [PATCH] Fix the type of tcbtab iterators
* src/strace.c (alloctcb, startup_attach, init, pid2tcb, cleanup):
Change the type of tcbtab iterators from unsigned int to size_t so that
they match the type of tcbtabsize.
Reference: https://github.com/strace/strace/commit/9ccfe110343369ea8986fb8781a679373b9b8892
Conflict: NA
---
src/strace.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/strace.c b/src/strace.c
index 6f808996e..d3fa44f30 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -1033,7 +1033,7 @@ alloctcb(int pid)
if (nprocs == tcbtabsize)
expand_tcbtab();
- for (unsigned int i = 0; i < tcbtabsize; ++i) {
+ for (size_t i = 0; i < tcbtabsize; ++i) {
struct tcb *tcp = tcbtab[i];
if (!tcp->pid) {
memset(tcp, 0, sizeof(*tcp));
@@ -1447,7 +1447,7 @@ startup_attach(void)
debug_msg("new tracer pid is %d", strace_tracer_pid);
}
- for (unsigned int tcbi = 0; tcbi < tcbtabsize; ++tcbi) {
+ for (size_t tcbi = 0; tcbi < tcbtabsize; ++tcbi) {
tcp = tcbtab[tcbi];
if (!tcp->pid)
@@ -2761,7 +2761,7 @@ init(int argc, char *argv[])
* of tcbs are not filled though tcbs are initialized.
* We must fill the fields here.
*/
- for (unsigned int i = 0; i < tcbtabsize; ++i) {
+ for (size_t i = 0; i < tcbtabsize; ++i) {
struct tcb *tcp = tcbtab[i];
if (tcp->comm[0] == 0)
maybe_load_task_comm(tcp);
@@ -3090,7 +3090,7 @@ pid2tcb(const int pid)
if (tcp && tcp->pid == pid)
return tcp;
- for (unsigned int i = 0; i < tcbtabsize; ++i) {
+ for (size_t i = 0; i < tcbtabsize; ++i) {
tcp = tcbtab[i];
if (tcp->pid == pid)
return *ptcp = tcp;
@@ -3108,7 +3108,7 @@ cleanup(int fatal_sig)
if (!fatal_sig)
fatal_sig = SIGTERM;
- for (unsigned int i = 0; i < tcbtabsize; ++i) {
+ for (size_t i = 0; i < tcbtabsize; ++i) {
struct tcb *tcp = tcbtab[i];
if (!tcp->pid)
continue;
--
2.33.0

View File

@ -0,0 +1,90 @@
From 651640fd5faf0e24d9cd88c596854c108f732329 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@strace.io>
Date: Fri, 24 Nov 2023 08:00:00 +0000
Subject: [PATCH] Move print_debug_info() definition before cleanup()
* src/strace.c (print_debug_info): Move before cleanup().
Reference: https://github.com/strace/strace/commit/651640fd5faf0e24d9cd88c596854c108f732329
Conflict: NA
---
src/strace.c | 56 ++++++++++++++++++++++++++--------------------------
1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/src/strace.c b/src/strace.c
index d3fa44f30..28f5b4368 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -3099,34 +3099,6 @@ pid2tcb(const int pid)
return NULL;
}
-static void
-cleanup(int fatal_sig)
-{
- if (ptrace_setoptions & PTRACE_O_EXITKILL)
- return;
-
- if (!fatal_sig)
- fatal_sig = SIGTERM;
-
- for (size_t i = 0; i < tcbtabsize; ++i) {
- struct tcb *tcp = tcbtab[i];
- if (!tcp->pid)
- continue;
- debug_func_msg("looking at pid %u", tcp->pid);
- if (tcp->pid == strace_child) {
- kill(tcp->pid, SIGCONT);
- kill(tcp->pid, fatal_sig);
- }
- detach(tcp);
- }
-}
-
-static void
-interrupt(int sig)
-{
- interrupted = sig;
-}
-
static void
print_debug_info(const int pid, int status)
{
@@ -3166,6 +3138,34 @@ print_debug_info(const int pid, int status)
error_msg("[wait(0x%06x) = %u] %s%s", status, pid, buf, evbuf);
}
+static void
+cleanup(int fatal_sig)
+{
+ if (ptrace_setoptions & PTRACE_O_EXITKILL)
+ return;
+
+ if (!fatal_sig)
+ fatal_sig = SIGTERM;
+
+ for (size_t i = 0; i < tcbtabsize; ++i) {
+ struct tcb *tcp = tcbtab[i];
+ if (!tcp->pid)
+ continue;
+ debug_func_msg("looking at pid %u", tcp->pid);
+ if (tcp->pid == strace_child) {
+ kill(tcp->pid, SIGCONT);
+ kill(tcp->pid, fatal_sig);
+ }
+ detach(tcp);
+ }
+}
+
+static void
+interrupt(int sig)
+{
+ interrupted = sig;
+}
+
static struct tcb *
maybe_allocate_tcb(const int pid, int status)
{
--
2.33.0

View File

@ -0,0 +1,51 @@
From 83a2af722ce2c2b38f3d6d9f4114015521449a2a Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@strace.io>
Date: Sat, 25 Nov 2023 08:00:00 +0000
Subject: [PATCH] Factor out droptcb_verbose() from detach()
* src/strace.c (droptcb_verbose): New function.
(detach): Use it instead of droptcb.
Reference: https://github.com/strace/strace/commit/83a2af722ce2c2b38f3d6d9f4114015521449a2a
Conflict: NA
---
src/strace.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/src/strace.c b/src/strace.c
index 28f5b4368..351842792 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -1148,6 +1148,16 @@ droptcb(struct tcb *tcp)
memset(tcp, 0, sizeof(*tcp));
}
+static void
+droptcb_verbose(struct tcb *tcp)
+{
+ if (!is_number_in_set(QUIET_ATTACH, quiet_set)
+ && (tcp->flags & TCB_ATTACHED))
+ error_msg("Process %u detached", tcp->pid);
+
+ droptcb(tcp);
+}
+
/* Detach traced process.
* Never call DETACH twice on the same process as both unattached and
* attached-unstopped processes give the same ESRCH. For unattached process we
@@ -1302,11 +1312,7 @@ detach(struct tcb *tcp)
}
drop:
- if (!is_number_in_set(QUIET_ATTACH, quiet_set)
- && (tcp->flags & TCB_ATTACHED))
- error_msg("Process %u detached", tcp->pid);
-
- droptcb(tcp);
+ droptcb_verbose(tcp);
}
static void
--
2.33.0

View File

@ -0,0 +1,127 @@
From 4c4c339585b523167e336d9b379fe4c00dc94dca Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@strace.io>
Date: Sun, 26 Nov 2023 08:00:00 +0000
Subject: [PATCH] Factor out interrupt_or_stop() from detach()
* src/strace.c (interrupt_or_stop): New function.
(detach): Use it.
Reference: https://github.com/strace/strace/commit/4c4c339585b523167e336d9b379fe4c00dc94dca
Conflict: NA
---
src/strace.c | 49 ++++++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 21 deletions(-)
diff --git a/src/strace.c b/src/strace.c
index 351842792..385cbaa72 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -1158,17 +1158,10 @@ droptcb_verbose(struct tcb *tcp)
droptcb(tcp);
}
-/* Detach traced process.
- * Never call DETACH twice on the same process as both unattached and
- * attached-unstopped processes give the same ESRCH. For unattached process we
- * would SIGSTOP it and wait for its SIGSTOP notification forever.
- */
-static void
-detach(struct tcb *tcp)
+/* Returns true when the tracee has to be waited for. */
+static bool
+interrupt_or_stop(struct tcb *tcp)
{
- int error;
- int status;
-
/*
* Linux wrongly insists the child be stopped
* before detaching. Arghh. We go through hoops
@@ -1176,24 +1169,24 @@ detach(struct tcb *tcp)
*/
if (!(tcp->flags & TCB_ATTACHED))
- goto drop;
+ return false;
/* We attached but possibly didn't see the expected SIGSTOP.
* We must catch exactly one as otherwise the detached process
* would be left stopped (process state T).
*/
if (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)
- goto wait_loop;
+ return true;
- error = ptrace(PTRACE_DETACH, tcp->pid, 0, 0);
+ int error = ptrace(PTRACE_DETACH, tcp->pid, 0, 0);
if (!error) {
/* On a clear day, you can see forever. */
- goto drop;
+ return false;
}
if (errno != ESRCH) {
/* Shouldn't happen. */
perror_func_msg("ptrace(PTRACE_DETACH,%u)", tcp->pid);
- goto drop;
+ return false;
}
/* ESRCH: process is either not stopped or doesn't exist. */
if (my_tkill(tcp->pid, 0) < 0) {
@@ -1201,7 +1194,7 @@ detach(struct tcb *tcp)
/* Shouldn't happen. */
perror_func_msg("tkill(%u,0)", tcp->pid);
/* else: process doesn't exist. */
- goto drop;
+ return false;
}
/* Process is not stopped, need to stop it. */
if (use_seize) {
@@ -1213,26 +1206,40 @@ detach(struct tcb *tcp)
*/
error = ptrace(PTRACE_INTERRUPT, tcp->pid, 0, 0);
if (!error)
- goto wait_loop;
+ return true;
if (errno != ESRCH)
perror_func_msg("ptrace(PTRACE_INTERRUPT,%u)", tcp->pid);
} else {
error = my_tkill(tcp->pid, SIGSTOP);
if (!error)
- goto wait_loop;
+ return true;
if (errno != ESRCH)
perror_func_msg("tkill(%u,SIGSTOP)", tcp->pid);
}
+
/* Either process doesn't exist, or some weird error. */
- goto drop;
+ return false;
+}
- wait_loop:
- /* We end up here in three cases:
+/* Detach traced process.
+ * Never call DETACH twice on the same process as both unattached and
+ * attached-unstopped processes give the same ESRCH. For unattached process we
+ * would SIGSTOP it and wait for its SIGSTOP notification forever.
+ */
+static void
+detach(struct tcb *tcp)
+{
+ if (!interrupt_or_stop(tcp))
+ goto drop;
+
+ /*
+ * We end up here in three cases:
* 1. We sent PTRACE_INTERRUPT (use_seize case)
* 2. We sent SIGSTOP (!use_seize)
* 3. Attach SIGSTOP was already pending (TCB_IGNORE_ONE_SIGSTOP set)
*/
for (;;) {
+ int status;
unsigned int sig;
if (waitpid(tcp->pid, &status, __WALL) < 0) {
if (errno == EINTR)
--
2.33.0

View File

@ -0,0 +1,179 @@
From d275fc0312255ddffa878b70da34d945b4fd8212 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@strace.io>
Date: Mon, 27 Nov 2023 08:00:00 +0000
Subject: [PATCH] Factor out detach_interrupted_or_stopped() from detach()
* src/strace.c (detach_interrupted_or_stopped): New function.
(detach): Use it.
Reference: https://github.com/strace/strace/commit/d275fc0312255ddffa878b70da34d945b4fd8212
Conflict: NA
---
src/strace.c | 136 +++++++++++++++++++++++++++------------------------
1 file changed, 72 insertions(+), 64 deletions(-)
diff --git a/src/strace.c b/src/strace.c
index 385cbaa72..ef3360b95 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -1221,6 +1221,77 @@ interrupt_or_stop(struct tcb *tcp)
return false;
}
+/* Returns true if the tracee can be passed to droptcb. */
+static bool
+detach_interrupted_or_stopped(struct tcb *tcp, int status)
+{
+ if (!WIFSTOPPED(status)) {
+ /*
+ * Tracee exited or was killed by signal.
+ * We shouldn't normally reach this place:
+ * we don't want to consume exit status.
+ * Consider "strace -p PID" being ^C-ed:
+ * we want merely to detach from PID.
+ *
+ * However, we _can_ end up here if tracee
+ * was SIGKILLed.
+ */
+ return true;
+ }
+ unsigned int sig = WSTOPSIG(status);
+ debug_msg("detach wait: event:%d sig:%d",
+ (unsigned) status >> 16, sig);
+ if (use_seize) {
+ unsigned event = (unsigned)status >> 16;
+ if (event == PTRACE_EVENT_STOP /*&& sig == SIGTRAP*/) {
+ /*
+ * sig == SIGTRAP: PTRACE_INTERRUPT stop.
+ * sig == other: process was already stopped
+ * with this stopping sig (see tests/detach-stopped).
+ * Looks like re-injecting this sig is not necessary
+ * in DETACH for the tracee to remain stopped.
+ */
+ sig = 0;
+ }
+ /*
+ * PTRACE_INTERRUPT is not guaranteed to produce
+ * the above event if other ptrace-stop is pending.
+ * See tests/detach-sleeping testcase:
+ * strace got SIGINT while tracee is sleeping.
+ * We sent PTRACE_INTERRUPT.
+ * We see syscall exit, not PTRACE_INTERRUPT stop.
+ * We won't get PTRACE_INTERRUPT stop
+ * if we would CONT now. Need to DETACH.
+ */
+ if (sig == syscall_trap_sig)
+ sig = 0;
+ /* else: not sure in which case we can be here.
+ * Signal stop? Inject it while detaching.
+ */
+ ptrace_restart(PTRACE_DETACH, tcp, sig);
+ return true;
+ }
+ /* Note: this check has to be after use_seize check */
+ /* (else, in use_seize case SIGSTOP will be mistreated) */
+ if (sig == SIGSTOP) {
+ /* Detach, suppressing SIGSTOP */
+ ptrace_restart(PTRACE_DETACH, tcp, 0);
+ return true;
+ }
+ if (sig == syscall_trap_sig)
+ sig = 0;
+ /* Can't detach just yet, may need to wait for SIGSTOP */
+ int error = ptrace_restart(PTRACE_CONT, tcp, sig);
+ if (error < 0) {
+ /* Should not happen.
+ * Note: ptrace_restart returns 0 on ESRCH, so it's not it.
+ * ptrace_restart already emitted error message.
+ */
+ return true;
+ }
+ return false;
+}
+
/* Detach traced process.
* Never call DETACH twice on the same process as both unattached and
* attached-unstopped processes give the same ESRCH. For unattached process we
@@ -1240,7 +1311,6 @@ detach(struct tcb *tcp)
*/
for (;;) {
int status;
- unsigned int sig;
if (waitpid(tcp->pid, &status, __WALL) < 0) {
if (errno == EINTR)
continue;
@@ -1252,70 +1322,8 @@ detach(struct tcb *tcp)
perror_func_msg("waitpid(%u)", tcp->pid);
break;
}
- if (!WIFSTOPPED(status)) {
- /*
- * Tracee exited or was killed by signal.
- * We shouldn't normally reach this place:
- * we don't want to consume exit status.
- * Consider "strace -p PID" being ^C-ed:
- * we want merely to detach from PID.
- *
- * However, we _can_ end up here if tracee
- * was SIGKILLed.
- */
- break;
- }
- sig = WSTOPSIG(status);
- debug_msg("detach wait: event:%d sig:%d",
- (unsigned) status >> 16, sig);
- if (use_seize) {
- unsigned event = (unsigned)status >> 16;
- if (event == PTRACE_EVENT_STOP /*&& sig == SIGTRAP*/) {
- /*
- * sig == SIGTRAP: PTRACE_INTERRUPT stop.
- * sig == other: process was already stopped
- * with this stopping sig (see tests/detach-stopped).
- * Looks like re-injecting this sig is not necessary
- * in DETACH for the tracee to remain stopped.
- */
- sig = 0;
- }
- /*
- * PTRACE_INTERRUPT is not guaranteed to produce
- * the above event if other ptrace-stop is pending.
- * See tests/detach-sleeping testcase:
- * strace got SIGINT while tracee is sleeping.
- * We sent PTRACE_INTERRUPT.
- * We see syscall exit, not PTRACE_INTERRUPT stop.
- * We won't get PTRACE_INTERRUPT stop
- * if we would CONT now. Need to DETACH.
- */
- if (sig == syscall_trap_sig)
- sig = 0;
- /* else: not sure in which case we can be here.
- * Signal stop? Inject it while detaching.
- */
- ptrace_restart(PTRACE_DETACH, tcp, sig);
- break;
- }
- /* Note: this check has to be after use_seize check */
- /* (else, in use_seize case SIGSTOP will be mistreated) */
- if (sig == SIGSTOP) {
- /* Detach, suppressing SIGSTOP */
- ptrace_restart(PTRACE_DETACH, tcp, 0);
+ if (detach_interrupted_or_stopped(tcp, status))
break;
- }
- if (sig == syscall_trap_sig)
- sig = 0;
- /* Can't detach just yet, may need to wait for SIGSTOP */
- error = ptrace_restart(PTRACE_CONT, tcp, sig);
- if (error < 0) {
- /* Should not happen.
- * Note: ptrace_restart returns 0 on ESRCH, so it's not it.
- * ptrace_restart already emitted error message.
- */
- break;
- }
}
drop:
--
2.33.0

View File

@ -0,0 +1,118 @@
From 3cfa90b54367168cc1e473af004675f12816e955 Mon Sep 17 00:00:00 2001
From: donghaobo <donghaobo@kuaishou.com>
Date: Fri, 17 Nov 2023 13:55:22 +0800
Subject: [PATCH] strace: fix potential deadlock during cleanup
strace -f can potentially deadlock during cleanup if the tracee
is using vfork or CLONE_VFORK to spawn threads.
On linux, calling vfork will cause the calling thread to 'D' state
until the child process calls execve or exit. Therefore, strace
should detach the child process first, otherwise it can wait
indefinitely for the calling thread in 'D' state.
Reproducer:
/*
* Start tracing with strace -f,
* then press Ctrl-C within 9 seconds to interrupt.
*/
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
int main(void)
{
pid_t pid = vfork();
if (pid < 0)
return 1;
if (pid) {
int status;
waitpid(pid, &status, 0);
return 0;
}
sleep(9);
_exit(0);
}
* src/strace.c (cleanup): Do not call detach() for each tracee
one by one as it can deadlock, instead call interrupt_or_stop()
for each tracee and after that enter a wait loop calling
detach_interrupted_or_stopped() for each tracee as soon as
they become ready.
* NEWS: Mention this fix.
Co-authored-by: Dmitry V. Levin <ldv@strace.io>
Reference: https://github.com/strace/strace/commit/3cfa90b54367168cc1e473af004675f12816e955
Conflict: Remove NEWS
---
NEWS | 2 ++
src/strace.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/src/strace.c b/src/strace.c
index ef3360b95..04b3c1dfe 100644
--- a/src/strace.c
+++ b/src/strace.c
@@ -3168,6 +3168,8 @@ cleanup(int fatal_sig)
if (!fatal_sig)
fatal_sig = SIGTERM;
+ size_t num_to_wait = 0;
+
for (size_t i = 0; i < tcbtabsize; ++i) {
struct tcb *tcp = tcbtab[i];
if (!tcp->pid)
@@ -3177,7 +3179,48 @@ cleanup(int fatal_sig)
kill(tcp->pid, SIGCONT);
kill(tcp->pid, fatal_sig);
}
- detach(tcp);
+ if (interrupt_or_stop(tcp))
+ ++num_to_wait;
+ else
+ droptcb_verbose(tcp);
+ }
+
+ while (num_to_wait) {
+ int status;
+ pid_t pid = waitpid(-1, &status, __WALL);
+
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ /* ECHILD is not expected */
+ perror_func_msg("waitpid(-1, __WALL)");
+ break;
+ }
+
+ if (pid == popen_pid) {
+ if (!WIFSTOPPED(status))
+ popen_pid = 0;
+ continue;
+ }
+
+ if (debug_flag)
+ print_debug_info(pid, status);
+
+ struct tcb *tcp = pid2tcb(pid);
+ if (!tcp) {
+ if (!is_number_in_set(QUIET_EXIT, quiet_set)) {
+ /*
+ * This can happen if we inherited an unknown child.
+ */
+ error_msg("Exit of unknown pid %u ignored", pid);
+ }
+ continue;
+ }
+
+ if (detach_interrupted_or_stopped(tcp, status)) {
+ droptcb_verbose(tcp);
+ --num_to_wait;
+ }
}
}
--
2.33.0

View File

@ -1,7 +1,7 @@
Summary: Tracks and displays system calls associated with a running process Summary: Tracks and displays system calls associated with a running process
Name: strace Name: strace
Version: 6.6 Version: 6.6
Release: 2 Release: 3
# The test suite is GPLv2+, all the rest is LGPLv2.1+. # The test suite is GPLv2+, all the rest is LGPLv2.1+.
License: LGPL-2.1+ and GPL-2.0+ License: LGPL-2.1+ and GPL-2.0+
# Some distros require Group tag to be present, # Some distros require Group tag to be present,
@ -20,6 +20,13 @@ BuildRequires: pkgconfig(bluez)
# Install binutils-devel to enable symbol demangling. # Install binutils-devel to enable symbol demangling.
BuildRequires: elfutils-devel binutils-devel BuildRequires: elfutils-devel binutils-devel
Patch0001: backport-0001-Fix-the-type-of-tcbtab-iterators.patch
Patch0002: backport-0002-Move-print_debug_info-definition-before-cleanup.patch
Patch0003: backport-0003-Factor-out-droptcb_verbose-from-detach.patch
Patch0004: backport-0004-Factor-out-interrupt_or_stop-from-detach.patch
Patch0005: backport-0005-Factor-out-detach_interrupted_or_stopped-from-detach.patch
Patch0006: backport-0006-strace-fix-potential-deadlock-during-cleanup.patch
# OBS compatibility # OBS compatibility
%{?!buildroot:BuildRoot: %_tmppath/buildroot-%name-%version-%release} %{?!buildroot:BuildRoot: %_tmppath/buildroot-%name-%version-%release}
%define maybe_use_defattr %{?suse_version:%%defattr(-,root,root)} %define maybe_use_defattr %{?suse_version:%%defattr(-,root,root)}
@ -100,6 +107,9 @@ wait
%{_mandir}/man1/* %{_mandir}/man1/*
%changelog %changelog
* Fri Dec 27 2024 wangxiao <wangxiao184@h-partners.com> - 6.6-3
- strace: fix potential deadlock during cleanup
* Wed Feb 28 2024 liuchao <liuchao173@huawei.com> - 6.6-2 * Wed Feb 28 2024 liuchao <liuchao173@huawei.com> - 6.6-2
- remove redundant judgments - remove redundant judgments