From eab6fcf772e91a16428e0f3262e6b4ef81705f79 Mon Sep 17 00:00:00 2001 From: sunguoshuai Date: Tue, 22 Jan 2019 21:29:56 -0500 Subject: [PATCH] multipathd: disable queueing for recreated map in uev_remove_map reason:disable queueing for recreated map in uev_remove_map --- multipathd/main.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/multipathd/main.c b/multipathd/main.c index 8baf9ab..23fb9d4 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -674,7 +674,7 @@ uev_add_map (struct uevent * uev, struct vectors * vecs) char *alias; int major = -1, minor = -1, rc; - condlog(3, "%s: add map (uevent)", uev->kernel); + condlog(2, "%s: add map (uevent)", uev->kernel); alias = uevent_get_dm_name(uev); if (!alias) { condlog(3, "%s: No DM_NAME in uevent", uev->kernel); @@ -793,7 +793,26 @@ uev_remove_map (struct uevent * uev, struct vectors * vecs) uev->kernel, mpp->alias, alias); goto out; } - + /* + * There may be a race window here: + * 1. all paths gone, causing map flushed both from multipathd and kernel + * 2. paths regenerated, causing multipathd creating the map again. + * 1 will generate a remove uevent which can be handled after 2, + * so we can disable queueing for the map created by 2 here temporarily + * and let the change uevent (generated by 2) calling uev_add_map + * ->setup_multipath to set queueing again. This can prevent + * the deadlock in this race window. + * + * The possible deadlock is: all udevd workers hangs in + * devices because of queue_if_no_path, so no udevd workers + * can handle new event and since multipathd will remove the map, + * the checkerloop cannot check this map's retry tick timeout + * and cancel the io hang which makes the udevd worker hang forever. + * multipathd cannot receive any uevent from udevd because all + * udevd workers hang there so the map cannot be recreated + * again which makes a deadlock. + */ + dm_queue_if_no_path(alias, 0); remove_map_and_stop_waiter(mpp, vecs); out: lock_cleanup_pop(vecs->lock); -- 1.8.3.1