commit
dba66656cd
@ -1,450 +0,0 @@
|
||||
From fa6a9ff44433c064188f57aec90a64a1b627006a Mon Sep 17 00:00:00 2001
|
||||
From: Xavi Hernandez <xhernandez@redhat.com>
|
||||
Date: Fri, 7 Feb 2020 10:19:57 +0100
|
||||
Subject: [PATCH] core: fix memory pool management races
|
||||
|
||||
Objects allocated from a per-thread memory pool keep a reference to it
|
||||
to be able to return the object to the pool when not used anymore. The
|
||||
object holding this reference can have a long life cycle that could
|
||||
survive a glfs_fini() call.
|
||||
|
||||
This means that it's unsafe to destroy memory pools from glfs_fini().
|
||||
|
||||
Another side effect of destroying memory pools from glfs_fini() is that
|
||||
the TLS variable that points to one of those pools cannot be reset for
|
||||
all alive threads. This means that any attempt to allocate memory from
|
||||
those threads will access already free'd memory, which is very
|
||||
dangerous.
|
||||
|
||||
To fix these issues, mem_pools_fini() doesn't destroy pool lists
|
||||
anymore. Only at process termination the pools are destroyed.
|
||||
|
||||
Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965
|
||||
Fixes: bz#1801684
|
||||
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
||||
|
||||
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
|
||||
index 63b6358..ae06f8b 100644
|
||||
--- a/libglusterfs/src/globals.c
|
||||
+++ b/libglusterfs/src/globals.c
|
||||
@@ -314,7 +314,18 @@ glusterfs_cleanup(void *ptr)
|
||||
GF_FREE(thread_syncopctx.groups);
|
||||
}
|
||||
|
||||
- mem_pool_thread_destructor();
|
||||
+ mem_pool_thread_destructor(NULL);
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+gf_thread_needs_cleanup(void)
|
||||
+{
|
||||
+ /* The value stored in free_key TLS is not really used for anything, but
|
||||
+ * pthread implementation doesn't call the TLS destruction function unless
|
||||
+ * it's != NULL. This function must be called whenever something is
|
||||
+ * allocated for this thread so that glusterfs_cleanup() will be called
|
||||
+ * and resources can be released. */
|
||||
+ (void)pthread_setspecific(free_key, (void *)1);
|
||||
}
|
||||
|
||||
static void
|
||||
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
|
||||
index e19a7cf..82cd662 100644
|
||||
--- a/libglusterfs/src/glusterfs/globals.h
|
||||
+++ b/libglusterfs/src/glusterfs/globals.h
|
||||
@@ -162,6 +162,9 @@ glusterfs_leaseid_exist(void);
|
||||
int
|
||||
glusterfs_globals_init(glusterfs_ctx_t *ctx);
|
||||
|
||||
+void
|
||||
+gf_thread_needs_cleanup(void);
|
||||
+
|
||||
struct tvec_base *
|
||||
glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx);
|
||||
void
|
||||
diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
|
||||
index e044175..90fb882 100644
|
||||
--- a/libglusterfs/src/glusterfs/mem-pool.h
|
||||
+++ b/libglusterfs/src/glusterfs/mem-pool.h
|
||||
@@ -244,24 +244,26 @@ typedef struct per_thread_pool {
|
||||
} per_thread_pool_t;
|
||||
|
||||
typedef struct per_thread_pool_list {
|
||||
- /*
|
||||
- * These first two members are protected by the global pool lock. When
|
||||
- * a thread first tries to use any pool, we create one of these. We
|
||||
- * link it into the global list using thr_list so the pool-sweeper
|
||||
- * thread can find it, and use pthread_setspecific so this thread can
|
||||
- * find it. When the per-thread destructor runs, we "poison" the pool
|
||||
- * list to prevent further allocations. This also signals to the
|
||||
- * pool-sweeper thread that the list should be detached and freed after
|
||||
- * the next time it's swept.
|
||||
- */
|
||||
+ /* thr_list is used to place the TLS pool_list into the active global list
|
||||
+ * (pool_threads) or the inactive global list (pool_free_threads). It's
|
||||
+ * protected by the global pool_lock. */
|
||||
struct list_head thr_list;
|
||||
- unsigned int poison;
|
||||
+
|
||||
+ /* This lock is used to update poison and the hot/cold lists of members
|
||||
+ * of 'pools' array. */
|
||||
+ pthread_spinlock_t lock;
|
||||
+
|
||||
+ /* This field is used to mark a pool_list as not being owned by any thread.
|
||||
+ * This means that the sweeper thread won't be cleaning objects stored in
|
||||
+ * its pools. mem_put() uses it to decide if the object being released is
|
||||
+ * placed into its original pool_list or directly destroyed. */
|
||||
+ bool poison;
|
||||
+
|
||||
/*
|
||||
* There's really more than one pool, but the actual number is hidden
|
||||
* in the implementation code so we just make it a single-element array
|
||||
* here.
|
||||
*/
|
||||
- pthread_spinlock_t lock;
|
||||
per_thread_pool_t pools[1];
|
||||
} per_thread_pool_list_t;
|
||||
|
||||
@@ -306,7 +308,7 @@ void
|
||||
mem_pool_destroy(struct mem_pool *pool);
|
||||
|
||||
void
|
||||
-mem_pool_thread_destructor(void);
|
||||
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list);
|
||||
|
||||
void
|
||||
gf_mem_acct_enable_set(void *ctx);
|
||||
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
|
||||
index d0f8a64..e08e56c 100644
|
||||
--- a/libglusterfs/src/mem-pool.c
|
||||
+++ b/libglusterfs/src/mem-pool.c
|
||||
@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL;
|
||||
#define POOL_SWEEP_SECS 30
|
||||
|
||||
typedef struct {
|
||||
- struct list_head death_row;
|
||||
pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
|
||||
unsigned int n_cold_lists;
|
||||
} sweep_state_t;
|
||||
@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static unsigned int init_count = 0;
|
||||
static pthread_t sweeper_tid;
|
||||
|
||||
-gf_boolean_t
|
||||
+static bool
|
||||
collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
|
||||
{
|
||||
unsigned int i;
|
||||
per_thread_pool_t *pt_pool;
|
||||
- gf_boolean_t poisoned;
|
||||
|
||||
(void)pthread_spin_lock(&pool_list->lock);
|
||||
|
||||
- poisoned = pool_list->poison != 0;
|
||||
- if (!poisoned) {
|
||||
- for (i = 0; i < NPOOLS; ++i) {
|
||||
- pt_pool = &pool_list->pools[i];
|
||||
- if (pt_pool->cold_list) {
|
||||
- if (state->n_cold_lists >= N_COLD_LISTS) {
|
||||
- break;
|
||||
- }
|
||||
- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
|
||||
+ for (i = 0; i < NPOOLS; ++i) {
|
||||
+ pt_pool = &pool_list->pools[i];
|
||||
+ if (pt_pool->cold_list) {
|
||||
+ if (state->n_cold_lists >= N_COLD_LISTS) {
|
||||
+ (void)pthread_spin_unlock(&pool_list->lock);
|
||||
+ return true;
|
||||
}
|
||||
- pt_pool->cold_list = pt_pool->hot_list;
|
||||
- pt_pool->hot_list = NULL;
|
||||
+ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
|
||||
}
|
||||
+ pt_pool->cold_list = pt_pool->hot_list;
|
||||
+ pt_pool->hot_list = NULL;
|
||||
}
|
||||
|
||||
(void)pthread_spin_unlock(&pool_list->lock);
|
||||
|
||||
- return poisoned;
|
||||
+ return false;
|
||||
}
|
||||
|
||||
-void
|
||||
+static void
|
||||
free_obj_list(pooled_obj_hdr_t *victim)
|
||||
{
|
||||
pooled_obj_hdr_t *next;
|
||||
@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim)
|
||||
}
|
||||
}
|
||||
|
||||
-void *
|
||||
+static void *
|
||||
pool_sweeper(void *arg)
|
||||
{
|
||||
sweep_state_t state;
|
||||
per_thread_pool_list_t *pool_list;
|
||||
- per_thread_pool_list_t *next_pl;
|
||||
- per_thread_pool_t *pt_pool;
|
||||
- unsigned int i;
|
||||
- gf_boolean_t poisoned;
|
||||
+ uint32_t i;
|
||||
+ bool pending;
|
||||
|
||||
/*
|
||||
* This is all a bit inelegant, but the point is to avoid doing
|
||||
* expensive things (like freeing thousands of objects) while holding a
|
||||
- * global lock. Thus, we split each iteration into three passes, with
|
||||
+ * global lock. Thus, we split each iteration into two passes, with
|
||||
* only the first and fastest holding the lock.
|
||||
*/
|
||||
|
||||
+ pending = true;
|
||||
+
|
||||
for (;;) {
|
||||
- sleep(POOL_SWEEP_SECS);
|
||||
+ /* If we know there's pending work to do (or it's the first run), we
|
||||
+ * do collect garbage more often. */
|
||||
+ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
|
||||
+
|
||||
(void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
|
||||
- INIT_LIST_HEAD(&state.death_row);
|
||||
state.n_cold_lists = 0;
|
||||
+ pending = false;
|
||||
|
||||
/* First pass: collect stuff that needs our attention. */
|
||||
(void)pthread_mutex_lock(&pool_lock);
|
||||
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list)
|
||||
+ list_for_each_entry(pool_list, &pool_threads, thr_list)
|
||||
{
|
||||
- (void)pthread_mutex_unlock(&pool_lock);
|
||||
- poisoned = collect_garbage(&state, pool_list);
|
||||
- (void)pthread_mutex_lock(&pool_lock);
|
||||
-
|
||||
- if (poisoned) {
|
||||
- list_move(&pool_list->thr_list, &state.death_row);
|
||||
+ if (collect_garbage(&state, pool_list)) {
|
||||
+ pending = true;
|
||||
}
|
||||
}
|
||||
(void)pthread_mutex_unlock(&pool_lock);
|
||||
|
||||
- /* Second pass: free dead pools. */
|
||||
- (void)pthread_mutex_lock(&pool_free_lock);
|
||||
- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list)
|
||||
- {
|
||||
- for (i = 0; i < NPOOLS; ++i) {
|
||||
- pt_pool = &pool_list->pools[i];
|
||||
- free_obj_list(pt_pool->cold_list);
|
||||
- free_obj_list(pt_pool->hot_list);
|
||||
- pt_pool->hot_list = pt_pool->cold_list = NULL;
|
||||
- }
|
||||
- list_del(&pool_list->thr_list);
|
||||
- list_add(&pool_list->thr_list, &pool_free_threads);
|
||||
- }
|
||||
- (void)pthread_mutex_unlock(&pool_free_lock);
|
||||
-
|
||||
- /* Third pass: free cold objects from live pools. */
|
||||
+ /* Second pass: free cold objects from live pools. */
|
||||
for (i = 0; i < state.n_cold_lists; ++i) {
|
||||
free_obj_list(state.cold_lists[i]);
|
||||
}
|
||||
(void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
|
||||
}
|
||||
+
|
||||
+ return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
-mem_pool_thread_destructor(void)
|
||||
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
|
||||
{
|
||||
- per_thread_pool_list_t *pool_list = thread_pool_list;
|
||||
-
|
||||
- /* The pool-sweeper thread will take it from here.
|
||||
- *
|
||||
- * We can change 'poison' here without taking locks because the change
|
||||
- * itself doesn't interact with other parts of the code and a simple write
|
||||
- * is already atomic from the point of view of the processor.
|
||||
- *
|
||||
- * This change can modify what mem_put() does, but both possibilities are
|
||||
- * fine until the sweeper thread kicks in. The real synchronization must be
|
||||
- * between mem_put() and the sweeper thread. */
|
||||
+ per_thread_pool_t *pt_pool;
|
||||
+ uint32_t i;
|
||||
+
|
||||
+ if (pool_list == NULL) {
|
||||
+ pool_list = thread_pool_list;
|
||||
+ }
|
||||
+
|
||||
+ /* The current thread is terminating. None of the allocated objects will
|
||||
+ * be used again. We can directly destroy them here instead of delaying
|
||||
+ * it until the next sweeper loop. */
|
||||
if (pool_list != NULL) {
|
||||
- pool_list->poison = 1;
|
||||
+ /* Remove pool_list from the global list to avoid that sweeper
|
||||
+ * could touch it. */
|
||||
+ pthread_mutex_lock(&pool_lock);
|
||||
+ list_del(&pool_list->thr_list);
|
||||
+ pthread_mutex_unlock(&pool_lock);
|
||||
+
|
||||
+ /* We need to protect hot/cold changes from potential mem_put() calls
|
||||
+ * that reference this pool_list. Once poison is set to true, we are
|
||||
+ * sure that no one else will touch hot/cold lists. The only possible
|
||||
+ * race is when at the same moment a mem_put() is adding a new item
|
||||
+ * to the hot list. We protect from that by taking pool_list->lock.
|
||||
+ * After that we don't need the lock to destroy the hot/cold lists. */
|
||||
+ pthread_spin_lock(&pool_list->lock);
|
||||
+ pool_list->poison = true;
|
||||
+ pthread_spin_unlock(&pool_list->lock);
|
||||
+
|
||||
+ for (i = 0; i < NPOOLS; i++) {
|
||||
+ pt_pool = &pool_list->pools[i];
|
||||
+
|
||||
+ free_obj_list(pt_pool->hot_list);
|
||||
+ pt_pool->hot_list = NULL;
|
||||
+
|
||||
+ free_obj_list(pt_pool->cold_list);
|
||||
+ pt_pool->cold_list = NULL;
|
||||
+ }
|
||||
+
|
||||
+ pthread_mutex_lock(&pool_free_lock);
|
||||
+ list_add(&pool_list->thr_list, &pool_free_threads);
|
||||
+ pthread_mutex_unlock(&pool_free_lock);
|
||||
+
|
||||
thread_pool_list = NULL;
|
||||
}
|
||||
}
|
||||
@@ -528,6 +538,30 @@ mem_pools_preinit(void)
|
||||
init_done = GF_MEMPOOL_INIT_EARLY;
|
||||
}
|
||||
|
||||
+static __attribute__((destructor)) void
|
||||
+mem_pools_postfini(void)
|
||||
+{
|
||||
+ per_thread_pool_list_t *pool_list, *next;
|
||||
+
|
||||
+ /* This is part of a process shutdown (or dlclose()) which means that
|
||||
+ * most probably all threads should be stopped. However this is not the
|
||||
+ * case for gluster and there are even legitimate situations in which we
|
||||
+ * could have some threads alive. What is sure is that none of those
|
||||
+ * threads should be using anything from this library, so destroying
|
||||
+ * everything here should be fine and safe. */
|
||||
+
|
||||
+ list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list)
|
||||
+ {
|
||||
+ mem_pool_thread_destructor(pool_list);
|
||||
+ }
|
||||
+
|
||||
+ list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list)
|
||||
+ {
|
||||
+ list_del(&pool_list->thr_list);
|
||||
+ FREE(pool_list);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Call mem_pools_init() once threading has been configured completely. This
|
||||
* prevent the pool_sweeper thread from getting killed once the main() thread
|
||||
* exits during deamonizing. */
|
||||
@@ -560,10 +594,6 @@ mem_pools_fini(void)
|
||||
*/
|
||||
break;
|
||||
case 1: {
|
||||
- per_thread_pool_list_t *pool_list;
|
||||
- per_thread_pool_list_t *next_pl;
|
||||
- unsigned int i;
|
||||
-
|
||||
/* if mem_pools_init() was not called, sweeper_tid will be invalid
|
||||
* and the functions will error out. That is not critical. In all
|
||||
* other cases, the sweeper_tid will be valid and the thread gets
|
||||
@@ -571,32 +601,11 @@ mem_pools_fini(void)
|
||||
(void)pthread_cancel(sweeper_tid);
|
||||
(void)pthread_join(sweeper_tid, NULL);
|
||||
|
||||
- /* At this point all threads should have already terminated, so
|
||||
- * it should be safe to destroy all pending per_thread_pool_list_t
|
||||
- * structures that are stored for each thread. */
|
||||
- mem_pool_thread_destructor();
|
||||
-
|
||||
- /* free all objects from all pools */
|
||||
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads,
|
||||
- thr_list)
|
||||
- {
|
||||
- for (i = 0; i < NPOOLS; ++i) {
|
||||
- free_obj_list(pool_list->pools[i].hot_list);
|
||||
- free_obj_list(pool_list->pools[i].cold_list);
|
||||
- pool_list->pools[i].hot_list = NULL;
|
||||
- pool_list->pools[i].cold_list = NULL;
|
||||
- }
|
||||
-
|
||||
- list_del(&pool_list->thr_list);
|
||||
- FREE(pool_list);
|
||||
- }
|
||||
-
|
||||
- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads,
|
||||
- thr_list)
|
||||
- {
|
||||
- list_del(&pool_list->thr_list);
|
||||
- FREE(pool_list);
|
||||
- }
|
||||
+ /* There could be threads still running in some cases, so we can't
|
||||
+ * destroy pool_lists in use. We can also not destroy unused
|
||||
+ * pool_lists because some allocated objects may still be pointing
|
||||
+ * to them. */
|
||||
+ mem_pool_thread_destructor(NULL);
|
||||
|
||||
init_done = GF_MEMPOOL_INIT_DESTROY;
|
||||
/* Fall through. */
|
||||
@@ -617,7 +626,7 @@ mem_pools_fini(void)
|
||||
{
|
||||
}
|
||||
void
|
||||
-mem_pool_thread_destructor(void)
|
||||
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -738,13 +747,21 @@ mem_get_pool_list(void)
|
||||
}
|
||||
}
|
||||
|
||||
+ /* There's no need to take pool_list->lock, because this is already an
|
||||
+ * atomic operation and we don't need to synchronize it with any change
|
||||
+ * in hot/cold lists. */
|
||||
+ pool_list->poison = false;
|
||||
+
|
||||
(void)pthread_mutex_lock(&pool_lock);
|
||||
- pool_list->poison = 0;
|
||||
list_add(&pool_list->thr_list, &pool_threads);
|
||||
(void)pthread_mutex_unlock(&pool_lock);
|
||||
|
||||
thread_pool_list = pool_list;
|
||||
|
||||
+ /* Ensure that all memory objects associated to the new pool_list are
|
||||
+ * destroyed when the thread terminates. */
|
||||
+ gf_thread_needs_cleanup();
|
||||
+
|
||||
return pool_list;
|
||||
}
|
||||
|
||||
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
|
||||
index 2eb7b49..9d987f8 100644
|
||||
--- a/libglusterfs/src/syncop.c
|
||||
+++ b/libglusterfs/src/syncop.c
|
||||
@@ -97,6 +97,13 @@ syncopctx_setfsgroups(int count, const void *groups)
|
||||
|
||||
/* set/reset the ngrps, this is where reset of groups is handled */
|
||||
opctx->ngrps = count;
|
||||
+
|
||||
+ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) {
|
||||
+ /* This is the first time we are storing groups into the TLS structu
|
||||
+ * so we mark the current thread so that it will be properly cleaned
|
||||
+ * up when the thread terminates. */
|
||||
+ gf_thread_needs_cleanup();
|
||||
+ }
|
||||
opctx->valid |= SYNCOPCTX_GROUPS;
|
||||
|
||||
out:
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
Binary file not shown.
BIN
glusterfs-7.6.tar.gz
Normal file
BIN
glusterfs-7.6.tar.gz
Normal file
Binary file not shown.
@ -2,16 +2,14 @@
|
||||
%{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}}
|
||||
|
||||
Name: glusterfs
|
||||
Version: 7.0
|
||||
Release: 4
|
||||
Version: 7.6
|
||||
Release: 1
|
||||
License: GPLv2 and LGPLv3+
|
||||
Summary: Aggregating distributed file system
|
||||
URL: http://docs.gluster.org/
|
||||
Source0: https://download.gluster.org/pub/gluster/glusterfs/7/7.0/glusterfs-7.0.tar.gz
|
||||
Source0: https://download.gluster.org/pub/gluster/glusterfs/7/7.6/glusterfs-7.6.tar.gz
|
||||
Source7: glusterfsd.service
|
||||
|
||||
Patch0: 0000-core-fix-memory-pool-management-races.patch
|
||||
|
||||
BuildRequires: systemd bison flex gcc make libtool ncurses-devel readline-devel libattr-devel
|
||||
BuildRequires: libxml2-devel openssl-devel libaio-devel libacl-devel python3-devel git perl
|
||||
BuildRequires: libtirpc-devel rpcgen libuuid-devel sqlite-devel lvm2-devel firewalld
|
||||
@ -459,6 +457,9 @@ exit 0
|
||||
%{_mandir}/man8/*gluster*.8*
|
||||
|
||||
%changelog
|
||||
* Thu Jul 16 2020 yanglongkang <yanglongkang@huawei.com> - 7.6-1
|
||||
- update package to 7.6
|
||||
|
||||
* Mon Feb 24 2020 renxudong<renxudong1@huawei.com> - 7.0-4
|
||||
- Type:bugfix
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user