Bugfix for lock and owner bit

Correct the return of error code, add init of pthread spinlock and mutex
judgement, remove a repeated init of pthread lock init, fix owner bit
when SQ wrqps.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com
Signed-off-by: Ran Zhou <zhouran10@h-partners.com>
(cherry picked from commit 794f3792a7267d0586bfac7d67507a27a5e61305)
This commit is contained in:
Ran Zhou 2023-12-07 10:58:28 +08:00 committed by openeuler-sync-bot
parent 23d0687033
commit 626267239b
6 changed files with 438 additions and 7 deletions

View File

@ -0,0 +1,72 @@
From f912bc59d0b56b560a0097b5f738ba160a3e23eb Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Thu, 7 Dec 2023 09:47:59 +0800
Subject: [PATCH 77/80] libhns: Fix parent domain unsupported comp mask
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8MF28
Hns do not support any comp mask for parent domain. Driver returns
EINVAL if any compmask is set.
This patch Replace the inappropriate return value EINVAL with
EOPNOTSUPP.
The error is found by testcase test_mem_align_ud_traffic.
ERROR: test_mem_align_ud_traffic (tests.test_parent_domain.ParentDomain
TrafficTest)
----------------------------------------------------------------------
Traceback (most recent call last):
File "./tests/test_parent_domain.py", line 183, in test_mem_align_ud
_traffic
self.create_players(parent_domain_ud_res,
File "./tests/test_parent_domain.py", line 156, in create_players
self.client = resource(**self.dev_info, **resource_arg)
File "./tests/test_parent_domain.py", line 90, in __init__
super().__init__(**kwargs)
File "./tests/base.py", line 617, in __init__
super(RoCETrafficResources, self).__init__(dev_name, ib_port,
gid_index, **kwargs)
File "./tests/base.py", line 503, in __init__
super(TrafficResources, self).__init__(dev_name=dev_name,
File "./tests/base.py", line 477, in __init__
self.create_pd()
File "./tests/test_parent_domain.py", line 95, in create_pd
create_parent_domain_with_allocators(self)
File "./tests/test_parent_domain.py", line 69, in create_parent_
domain_with_allocators
raise ex
File "./tests/test_parent_domain.py", line 65, in create_parent_
domain_with_allocators
res.pd = ParentDomain(res.ctx, attr=pd_attr)
File "pd.pyx", line 261, in pyverbs.pd.ParentDomain.__init__
pyverbs.pyverbs_error.PyverbsRDMAError: Failed to allocate Parent
Domain.Errno: 22, Invalid argument
Fixes: cfe548d4c78e ("libhns: Add support for the thread
domain and the parent domain")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index e597e93..fae6126 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -238,7 +238,7 @@ struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
return NULL;
if (attr->comp_mask) {
- errno = EINVAL;
+ errno = EOPNOTSUPP;
return NULL;
}
--
2.25.1

View File

@ -0,0 +1,216 @@
From 3a1432cfdb7c696d3acf97025e6d74bbf3e520dc Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Thu, 7 Dec 2023 09:48:00 +0800
Subject: [PATCH 78/80] libhns: Add
pthread_spin_destroy()/pthread_mutex_destroy()
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8MF59
--------------------------------------------------------------------------
The functions pthread_spin_destroy()/pthread_mutex_destroy()
corresponds to pthread_spin_init()/pthread_mutex_init(). The
driver should call pthread_spin_destroy()/pthread_mutex_destroy()
to clean up resources before exiting.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.c | 61 +++++++++++++++++++++++++++-----
providers/hns/hns_roce_u_hw_v2.c | 1 +
providers/hns/hns_roce_u_verbs.c | 17 ++++++---
3 files changed, 67 insertions(+), 12 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index f30486f..dfcd798 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -346,6 +346,47 @@ static int query_dev_attr(struct hns_roce_context *context,
return 0;
}
+static int hns_roce_init_context_lock(struct hns_roce_context *context)
+{
+ int ret;
+
+ ret = pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
+ if (ret)
+ return ret;
+
+ ret = pthread_mutex_init(&context->qp_table_mutex, NULL);
+ if (ret)
+ goto destroy_uar_lock;
+
+ ret = pthread_mutex_init(&context->srq_table_mutex, NULL);
+ if (ret)
+ goto destroy_qp_mutex;
+
+ ret = pthread_mutex_init(&context->db_list_mutex, NULL);
+ if (ret)
+ goto destroy_srq_mutex;
+
+ return 0;
+
+destroy_srq_mutex:
+ pthread_mutex_destroy(&context->srq_table_mutex);
+
+destroy_qp_mutex:
+ pthread_mutex_destroy(&context->qp_table_mutex);
+
+destroy_uar_lock:
+ pthread_spin_destroy(&context->uar_lock);
+ return ret;
+}
+
+static void hns_roce_destroy_context_lock(struct hns_roce_context *context)
+{
+ pthread_spin_destroy(&context->uar_lock);
+ pthread_mutex_destroy(&context->qp_table_mutex);
+ pthread_mutex_destroy(&context->srq_table_mutex);
+ pthread_mutex_destroy(&context->db_list_mutex);
+}
+
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
int cmd_fd,
void *private_data)
@@ -365,7 +406,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
ucontext_set_cmd(&cmd, ctx_attr);
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
- goto err_free;
+ goto err_ibv_cmd;
+
+ if (hns_roce_init_context_lock(context))
+ goto err_ibv_cmd;
hr_dev->congest_type = resp.congest_type;
@@ -383,23 +427,23 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
context->qp_table_shift = calc_table_shift(resp.qp_tab_size,
HNS_ROCE_QP_TABLE_BITS);
context->qp_table_mask = (1 << context->qp_table_shift) - 1;
- pthread_mutex_init(&context->qp_table_mutex, NULL);
+
for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i)
context->qp_table[i].refcnt = 0;
context->srq_table_shift = calc_table_shift(resp.srq_tab_size,
HNS_ROCE_SRQ_TABLE_BITS);
context->srq_table_mask = (1 << context->srq_table_shift) - 1;
- pthread_mutex_init(&context->srq_table_mutex, NULL);
+
for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i)
context->srq_table[i].refcnt = 0;
if (query_dev_attr(context, hr_dev, &resp))
- goto err_free;
+ goto err_query_dev;
if (init_dca_context(context, cmd_fd,
&resp, ctx_attr, hr_dev->page_size))
- goto err_free;
+ goto err_query_dev;
if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size))
goto reset_free;
@@ -407,8 +451,6 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (hns_roce_mmap(hr_dev, context, cmd_fd))
goto uar_free;
- pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
-
verbs_set_ops(&context->ibv_ctx, &hns_common_ops);
verbs_set_ops(&context->ibv_ctx, &hr_dev->u_hw->hw_ops);
@@ -419,7 +461,9 @@ uar_free:
munmap(context->reset_state, hr_dev->page_size);
reset_free:
uninit_dca_context(context);
-err_free:
+err_query_dev:
+ hns_roce_destroy_context_lock(context);
+err_ibv_cmd:
verbs_uninit_context(&context->ibv_ctx);
free(context);
return NULL;
@@ -434,6 +478,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
if (context->reset_state)
munmap(context->reset_state, hr_dev->page_size);
uninit_dca_context(context);
+ hns_roce_destroy_context_lock(context);
verbs_uninit_context(&context->ibv_ctx);
free(context);
}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 68d7110..b2a8858 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -2049,6 +2049,7 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
hns_roce_unlock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
hns_roce_free_qp_buf(qp, ctx);
+ hns_roce_qp_spinlock_destroy(qp);
free(qp);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index fae6126..ba3fef6 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -803,6 +803,7 @@ int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
int hns_roce_u_destroy_cq(struct ibv_cq *cq)
{
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
int ret;
ret = ibv_cmd_destroy_cq(cq);
@@ -811,10 +812,13 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq)
hns_roce_uninit_cq_swc(to_hr_cq(cq));
- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db,
+ hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db,
HNS_ROCE_CQ_TYPE_DB);
- hns_roce_free_buf(&to_hr_cq(cq)->buf);
- free(to_hr_cq(cq));
+ hns_roce_free_buf(&hr_cq->buf);
+
+ hns_roce_spinlock_destroy(&hr_cq->hr_lock);
+
+ free(hr_cq);
return ret;
}
@@ -1071,7 +1075,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
set_srq_param(context, srq, init_attr);
if (alloc_srq_buf(srq))
- goto err_free_srq;
+ goto err_destroy_lock;
srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB);
if (!srq->rdb)
@@ -1102,6 +1106,9 @@ err_srq_db:
err_srq_buf:
free_srq_buf(srq);
+err_destroy_lock:
+ hns_roce_spinlock_destroy(&srq->hr_lock);
+
err_free_srq:
free(srq);
@@ -1191,6 +1198,8 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
hns_roce_free_db(ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB);
free_srq_buf(srq);
+
+ hns_roce_spinlock_destroy(&srq->hr_lock);
free(srq);
return 0;
--
2.25.1

View File

@ -0,0 +1,38 @@
From 8759b0e6ec4e73994743c1ae0d0ecc186688b6d6 Mon Sep 17 00:00:00 2001
From: Ran Zhou <zhouran10@h-partners.com>
Date: Thu, 7 Dec 2023 09:48:01 +0800
Subject: [PATCH 79/80] libhns: Removes a repeated initialization of a spinlock
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8MF83
--------------------------------------------------------------------------
The pthread_spin_init() of qp was done in create_qp(). We removed
the spinlock init in this place to avoid initializing twice.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Ran Zhou <zhouran10@h-partners.com>
---
providers/hns/hns_roce_u_verbs.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index ba3fef6..c404948 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1933,10 +1933,6 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
{
int ret;
- if (pthread_spin_init(&qp->sq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE) ||
- pthread_spin_init(&qp->rq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE))
- return -ENOMEM;
-
ret = qp_alloc_wqe(attr, hns_attr, qp, ctx);
if (ret)
return ret;
--
2.25.1

View File

@ -0,0 +1,95 @@
From 96d30f16bc03167c7c52e663785192382688f542 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Thu, 7 Dec 2023 09:48:02 +0800
Subject: [PATCH 80/80] libhns: Fix owner bit when SQ wraps around in new IO
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8MF9Q
--------------------------------------------------------------------------
The owner bit has been write in init_rc_wqe() or init_ud_wqe()
with a write value. And it will be overwritten by some subsequent
operations. When the SQ wraps around, the overwritten value will be
an incorrect value.
For example, driver will assign the owner bit in the second step,
and overwrite it in the third step.
```c
ibv_wr_start();
ibv_wr_rdma_write();
if (inline)
ibv_wr_set_inline_data_list();
else
ibv_wr_set_sge_list();
ibv_wr_complete();
```
This patch removes the redundant owner bit assignment operations
in new IO.
Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism")
Fixes: 163d62ca6196 ("libhns: Fix the owner bit error of sq in new io")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 7 -------
1 file changed, 7 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index b2a8858..acbc854 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -2544,8 +2544,6 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge,
wqe->msg_len = htole32(qp->sge_info.total_len);
hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num);
-
- enable_wqe(qp, wqe, qp->sq.head);
}
static void wr_send_rc(struct ibv_qp_ex *ibv_qp)
@@ -2737,7 +2735,6 @@ static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr,
qp->sge_info.total_len = length;
set_inline_data_list_rc(qp, wqe, 1, &buff);
- enable_wqe(qp, wqe, qp->sq.head);
}
static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
@@ -2755,7 +2752,6 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
qp->sge_info.total_len += buf_list[i].length;
set_inline_data_list_rc(qp, wqe, num_buf, buf_list);
- enable_wqe(qp, wqe, qp->sq.head);
}
static struct hns_roce_ud_sq_wqe *
@@ -2892,7 +2888,6 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
hr_reg_write(wqe, UDWQE_SGE_NUM, cnt);
qp->sge_info.start_idx += cnt;
- enable_wqe(qp, wqe, qp->sq.head);
}
static void set_inline_data_list_ud(struct hns_roce_qp *qp,
@@ -2958,7 +2953,6 @@ static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr,
qp->sge_info.total_len = length;
set_inline_data_list_ud(qp, wqe, 1, &buff);
- enable_wqe(qp, wqe, qp->sq.head);
}
static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
@@ -2976,7 +2970,6 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
qp->sge_info.total_len += buf_list[i].length;
set_inline_data_list_ud(qp, wqe, num_buf, buf_list);
- enable_wqe(qp, wqe, qp->sq.head);
}
static void wr_start(struct ibv_qp_ex *ibv_qp)
--
2.25.1

View File

@ -1,6 +1,6 @@
Name: rdma-core
Version: 41.0
Release: 22
Release: 23
Summary: RDMA core userspace libraries and daemons
License: GPLv2 or BSD
Url: https://github.com/linux-rdma/rdma-core
@ -82,6 +82,10 @@ patch73: 0073-libhns-Fix-uninitialized-qp-attr-when-flush-cqe.patch
patch74: 0074-libhns-Fix-possible-overflow-in-cq-clean.patch
patch75: 0075-libhns-Fix-unnecessary-dca-memory-detach.patch
patch76: 0076-libhns-Bugfix-for-wrong-timing-of-modifying-ibv_qp-s.patch
patch77: 0077-libhns-Fix-parent-domain-unsupported-comp-mask.patch
patch78: 0078-libhns-Add-pthread_spin_destroy-pthread_mutex_destro.patch
patch79: 0079-libhns-Removes-a-repeated-initialization-of-a-spinlo.patch
patch80: 0080-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
@ -329,6 +333,12 @@ fi
%{_mandir}/*
%changelog
* Thu Dec 7 2023 Ran Zhou <zhouran10@h-partners.com> - 41.0-23
- Type: bugfix
- ID: NA
- SUG: NA
- DESC: Bugfix for lock and owner bit
* Fri Dec 1 2023 Ran Zhou <zhouran10@h-partners.com> - 41.0-22
- Type: bugfix
- ID: NA