!86 [sync] PR-68: Backport bugfixes for hns
From: @openeuler-sync-bot Reviewed-by: @hellotcc Signed-off-by: @hellotcc
This commit is contained in:
commit
3e19c62071
69
0047-libhns-Fix-the-owner-bit-error-of-sq-in-new-io.patch
Normal file
69
0047-libhns-Fix-the-owner-bit-error-of-sq-in-new-io.patch
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
From a86a120c35b1112bcef6c3821c2e5e1910e615e9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Luoyouming <luoyouming@huawei.com>
|
||||||
|
Date: Fri, 2 Jun 2023 10:33:14 +0800
|
||||||
|
Subject: [PATCH 2/4] libhns: Fix the owner bit error of sq in new io
|
||||||
|
|
||||||
|
driver inclusion
|
||||||
|
category: bugfix
|
||||||
|
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7A5Y5
|
||||||
|
|
||||||
|
---------------------------------------------------------------
|
||||||
|
|
||||||
|
The code does not use the head position of sq to set the owner bit,
|
||||||
|
but uses the head after adding 1 to cause an owner bit error. When
|
||||||
|
the wqe queue has not been flipped, the hardware has flipped based
|
||||||
|
on the owner bit judgment, resulting in failure to obtain wqe,
|
||||||
|
unable to send, and unable to generate cqe. This patch will set the
|
||||||
|
onwer bit ahead of time before the head value increases.
|
||||||
|
|
||||||
|
Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism")
|
||||||
|
Signed-off-by: Luoyouming <luoyouming@huawei.com>
|
||||||
|
---
|
||||||
|
providers/hns/hns_roce_u_hw_v2.c | 10 ++++++----
|
||||||
|
1 file changed, 6 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
index 616d1ea..cde4801 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
@@ -2215,6 +2215,9 @@ init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
|
||||||
|
|
||||||
|
qp->sq.wrid[wqe_idx] = wr_id;
|
||||||
|
qp->cur_wqe = wqe;
|
||||||
|
+
|
||||||
|
+ enable_wqe(qp, wqe, qp->sq.head);
|
||||||
|
+
|
||||||
|
qp->sq.head++;
|
||||||
|
|
||||||
|
return wqe;
|
||||||
|
@@ -2236,9 +2239,6 @@ static void wr_set_sge_rc(struct ibv_qp_ex *ibv_qp, uint32_t lkey,
|
||||||
|
wqe->msg_len = htole32(length);
|
||||||
|
hr_reg_write(wqe, RCWQE_LEN0, length);
|
||||||
|
hr_reg_write(wqe, RCWQE_SGE_NUM, !!length);
|
||||||
|
- /* ignore ex sge start index */
|
||||||
|
-
|
||||||
|
- enable_wqe(qp, wqe, qp->sq.head);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void set_sgl_rc(struct hns_roce_v2_wqe_data_seg *dseg,
|
||||||
|
@@ -2541,6 +2541,9 @@ init_ud_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
|
||||||
|
|
||||||
|
qp->sq.wrid[wqe_idx] = wr_id;
|
||||||
|
qp->cur_wqe = wqe;
|
||||||
|
+
|
||||||
|
+ enable_wqe(qp, wqe, qp->sq.head);
|
||||||
|
+
|
||||||
|
qp->sq.head++;
|
||||||
|
|
||||||
|
return wqe;
|
||||||
|
@@ -2610,7 +2613,6 @@ static void wr_set_sge_ud(struct ibv_qp_ex *ibv_qp, uint32_t lkey,
|
||||||
|
dseg->len = htole32(length);
|
||||||
|
|
||||||
|
qp->sge_info.start_idx++;
|
||||||
|
- enable_wqe(qp, wqe, qp->sq.head);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
||||||
@ -0,0 +1,49 @@
|
|||||||
|
From cfea6efe6decfa8c209ad9a85e1290674370725e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Junxian Huang <huangjunxian6@hisilicon.com>
|
||||||
|
Date: Fri, 2 Jun 2023 10:33:15 +0800
|
||||||
|
Subject: [PATCH 3/4] libhns: Fix incorrect post-send with direct wqe of
|
||||||
|
wr-list in user space
|
||||||
|
|
||||||
|
driver inclusion
|
||||||
|
category: bugfix
|
||||||
|
bugzilla: https://gitee.com/openeuler/kernel/issues/I7A2SA
|
||||||
|
|
||||||
|
---------------------------------------------------------------
|
||||||
|
|
||||||
|
Currently, direct wqe is not supported for wr-list. RoCE driver excludes
|
||||||
|
direct wqe for wr-list by judging whether the number of wr is 1.
|
||||||
|
|
||||||
|
For a wr-list where the second wr is a length-error atomic wr, the
|
||||||
|
post-send driver handles the first wr and adds 1 to the wr number counter
|
||||||
|
firstly. While handling the second wr, the driver finds out a length error
|
||||||
|
and terminates the wr handle process, remaining the counter at 1. This
|
||||||
|
causes the driver mistakenly judges there is only 1 wr and thus enters
|
||||||
|
the direct wqe process, carrying the current length-error atomic wqe.
|
||||||
|
|
||||||
|
This patch fixes the error by adding a judgement whether the current wr
|
||||||
|
is a bad wr. If so, use the normal doorbell process but not direct wqe
|
||||||
|
despite the wr number is 1.
|
||||||
|
|
||||||
|
Fixes: 159933c37450 ("libhns: Add support for direct wqe")
|
||||||
|
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
|
||||||
|
---
|
||||||
|
providers/hns/hns_roce_u_hw_v2.c | 3 ++-
|
||||||
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
index cde4801..bb26c59 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
@@ -1480,7 +1480,8 @@ out:
|
||||||
|
|
||||||
|
udma_to_device_barrier();
|
||||||
|
|
||||||
|
- if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
|
||||||
|
+ if (nreq == 1 && !ret &&
|
||||||
|
+ (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
|
||||||
|
hns_roce_write_dwqe(qp, wqe);
|
||||||
|
else
|
||||||
|
hns_roce_update_sq_db(ctx, qp);
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
||||||
@ -0,0 +1,41 @@
|
|||||||
|
From 8fbf781e3b3630c25a361f7c5e3642350dcd21c9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chengchang Tang <tangchengchang@huawei.com>
|
||||||
|
Date: Fri, 2 Jun 2023 10:33:16 +0800
|
||||||
|
Subject: [PATCH 4/4] libhns: Add a judgment to the congestion control
|
||||||
|
algorithm
|
||||||
|
|
||||||
|
driver inclusion
|
||||||
|
category: bugfix
|
||||||
|
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7A7HI
|
||||||
|
|
||||||
|
---------------------------------------------------------------
|
||||||
|
|
||||||
|
The congestion control algorithm is used only when the comp_mask flag
|
||||||
|
HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE is seted.
|
||||||
|
|
||||||
|
A check on comp_mask is added to prevent invalid parameter errors caused
|
||||||
|
by unconfigured congestion control algorithm types.
|
||||||
|
|
||||||
|
Fixes: 7623f24781f1 ("libhns: Support congestion control algorithm configuration")
|
||||||
|
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
|
||||||
|
---
|
||||||
|
providers/hns/hns_roce_u_verbs.c | 3 +++
|
||||||
|
1 file changed, 3 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
index 6c6120c..fa27fc1 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_verbs.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
@@ -988,6 +988,9 @@ static int check_qp_congest_type(struct hns_roce_context *ctx,
|
||||||
|
{
|
||||||
|
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
|
||||||
|
|
||||||
|
+ if (!(hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE))
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
if (!check_comp_mask(hns_attr->congest_type, hr_dev->congest_type)) {
|
||||||
|
verbs_err(&ctx->ibv_ctx, "unsupported congest type 0x%x.\n",
|
||||||
|
hns_attr->congest_type);
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
||||||
@ -1,6 +1,6 @@
|
|||||||
Name: rdma-core
|
Name: rdma-core
|
||||||
Version: 41.0
|
Version: 41.0
|
||||||
Release: 11
|
Release: 12
|
||||||
Summary: RDMA core userspace libraries and daemons
|
Summary: RDMA core userspace libraries and daemons
|
||||||
License: GPLv2 or BSD
|
License: GPLv2 or BSD
|
||||||
Url: https://github.com/linux-rdma/rdma-core
|
Url: https://github.com/linux-rdma/rdma-core
|
||||||
@ -52,6 +52,9 @@ Patch42: 0043-libhns-Add-support-for-SVE-Direct-WQE.patch
|
|||||||
Patch43: 0044-libhns-Fix-the-sge-num-problem-of-atomic-op.patch
|
Patch43: 0044-libhns-Fix-the-sge-num-problem-of-atomic-op.patch
|
||||||
Patch44: 0045-libhns-Fix-sge-tail_len-overflow.patch
|
Patch44: 0045-libhns-Fix-sge-tail_len-overflow.patch
|
||||||
Patch45: 0046-libhns-Disable-local-invalidate-operation.patch
|
Patch45: 0046-libhns-Disable-local-invalidate-operation.patch
|
||||||
|
Patch46: 0047-libhns-Fix-the-owner-bit-error-of-sq-in-new-io.patch
|
||||||
|
Patch47: 0048-libhns-Fix-incorrect-post-send-with-direct-wqe-of-wr.patch
|
||||||
|
Patch48: 0049-libhns-Add-a-judgment-to-the-congestion-control-algo.patch
|
||||||
|
|
||||||
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
|
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
|
||||||
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
|
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
|
||||||
@ -299,6 +302,12 @@ fi
|
|||||||
%{_mandir}/*
|
%{_mandir}/*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Fri Jun 2 2023 Juan Zhou <zhoujuan51@h-partners.com> - 41.0-12
|
||||||
|
- Type: bugfix
|
||||||
|
- ID: NA
|
||||||
|
- SUG: NA
|
||||||
|
- DESC: Backport bugfix for hns
|
||||||
|
|
||||||
* Thu May 11 2023 Juan Zhou <zhoujuan51@h-partners.com> - 41.0-11
|
* Thu May 11 2023 Juan Zhou <zhoujuan51@h-partners.com> - 41.0-11
|
||||||
- Type: bugfix
|
- Type: bugfix
|
||||||
- ID: NA
|
- ID: NA
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user