1. The hns hardware logic requires wr->num_sge to be 1 when performing atomic operations. The code does not judge this condition, and the current patch adds this constraint. 2. In the sq inline scenario, when num_sge in post_send is not 1, sge array appears in the for loop without rotation and directly copy out of bounds. 3. Currently local invalidate operation don't work properly. Disable it for the time being. HIP08 and HIP09 hardware does not support this feature, so delete the associated code. Signed-off-by: Juan Zhou <zhoujuan51@h-partners.com> (cherry picked from commit 43c14b73409cf6e63278d5ff68e2694e592e9015)
92 lines
3.0 KiB
Diff
92 lines
3.0 KiB
Diff
From b5127a009336e0e6947433148c6c7422c277bce7 Mon Sep 17 00:00:00 2001
|
|
From: Luoyouming <luoyouming@huawei.com>
|
|
Date: Sat, 6 May 2023 18:06:38 +0800
|
|
Subject: [PATCH 1/3] libhns: Fix the sge num problem of atomic op
|
|
|
|
mainline inclusion
|
|
commit b4793235
|
|
category: bugfix
|
|
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I72EWP
|
|
CVE: NA
|
|
|
|
----------------------------------------------------------------------
|
|
|
|
The hns hardware logic requires wr->num_sge to be 1 when
|
|
performing atomic operations. The code does not judge this
|
|
condition, and the current patch adds this constraint.
|
|
|
|
Fixes: 3507f87f7760 ("libhns: Optimize set_sge process")
|
|
Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism")
|
|
Signed-off-by: Luoyouming <luoyouming@huawei.com>
|
|
Signed-off-by: Zhou Juan <nnuzj07170227@163.com>
|
|
---
|
|
providers/hns/hns_roce_u_hw_v2.c | 23 ++++++++++++++++++-----
|
|
1 file changed, 18 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
|
index a49b50d..5533cdb 100644
|
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
|
@@ -106,6 +106,9 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
|
void *buf[ATOMIC_BUF_NUM_MAX];
|
|
unsigned int buf_sge_num;
|
|
|
|
+ /* There is only one sge in atomic wr, and data_len is the data length
|
|
+ * in the first sge
|
|
+ */
|
|
if (is_std_atomic(data_len)) {
|
|
if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
|
|
aseg->fetchadd_swap_data = htole64(wr->wr.atomic.swap);
|
|
@@ -923,16 +926,19 @@ static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
|
uint32_t mask = qp->ex_sge.sge_cnt - 1;
|
|
uint32_t index = sge_info->start_idx;
|
|
struct ibv_sge *sge = wr->sg_list;
|
|
+ int total_sge = wr->num_sge;
|
|
+ bool flag = false;
|
|
uint32_t len = 0;
|
|
uint32_t cnt = 0;
|
|
- int flag;
|
|
int i;
|
|
|
|
- flag = (wr->send_flags & IBV_SEND_INLINE &&
|
|
- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
|
|
- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP);
|
|
+ if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD ||
|
|
+ wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP)
|
|
+ total_sge = 1;
|
|
+ else
|
|
+ flag = !!(wr->send_flags & IBV_SEND_INLINE);
|
|
|
|
- for (i = 0; i < wr->num_sge; i++, sge++) {
|
|
+ for (i = 0; i < total_sge; i++, sge++) {
|
|
if (unlikely(!sge->length))
|
|
continue;
|
|
|
|
@@ -2267,6 +2273,7 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge,
|
|
struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
|
|
struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
|
|
struct hns_roce_v2_wqe_data_seg *dseg;
|
|
+ uint32_t opcode;
|
|
|
|
if (!wqe)
|
|
return;
|
|
@@ -2276,9 +2283,15 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge,
|
|
return;
|
|
}
|
|
|
|
+
|
|
hr_reg_write(wqe, RCWQE_MSG_START_SGE_IDX,
|
|
qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1));
|
|
|
|
+ opcode = hr_reg_read(wqe, RCWQE_OPCODE);
|
|
+ if (opcode == HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP ||
|
|
+ opcode == HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD)
|
|
+ num_sge = 1;
|
|
+
|
|
dseg = (void *)(wqe + 1);
|
|
set_sgl_rc(dseg, qp, sg_list, num_sge);
|
|
|
|
--
|
|
2.25.1
|
|
|