Bugfix for hns SRQ and SGE. And also added all related cleanups and refactorings. Signed-off-by: zhengfeng luo <luozhengfeng@h-partners.com> Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
140 lines
4.1 KiB
Diff
140 lines
4.1 KiB
Diff
From 3507f87f776043acd238d7c0c41cc3511f186d08 Mon Sep 17 00:00:00 2001
|
|
From: Lang Cheng <chenglang@huawei.com>
|
|
Date: Fri, 14 May 2021 10:02:57 +0800
|
|
Subject: libhns: Optimize set_sge process
|
|
|
|
Use local variables to avoid frequent ldr/str operations. And because UD's
|
|
process of setting sge is more simple then RC, set_sge() can be splited
|
|
into two functions for compiler optimization.
|
|
|
|
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
|
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
|
---
|
|
providers/hns/hns_roce_u_hw_v2.c | 83 +++++++++++++++++++++++---------
|
|
1 file changed, 61 insertions(+), 22 deletions(-)
|
|
|
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
|
index 4988943a..dc79a6f8 100644
|
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
|
@@ -701,39 +701,78 @@ static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx)
|
|
return 0;
|
|
}
|
|
|
|
-static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
|
- struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
|
- struct hns_roce_sge_info *sge_info)
|
|
+static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
|
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
|
+ struct hns_roce_sge_info *sge_info)
|
|
{
|
|
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
|
|
+ uint32_t index = sge_info->start_idx;
|
|
+ struct ibv_sge *sge = wr->sg_list;
|
|
+ uint32_t len = 0;
|
|
+ uint32_t cnt = 0;
|
|
+ int flag;
|
|
int i;
|
|
|
|
- sge_info->valid_num = 0;
|
|
- sge_info->total_len = 0;
|
|
+ flag = (wr->send_flags & IBV_SEND_INLINE &&
|
|
+ wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
|
|
+ wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP);
|
|
|
|
- for (i = 0; i < wr->num_sge; i++) {
|
|
- if (unlikely(!wr->sg_list[i].length))
|
|
+ for (i = 0; i < wr->num_sge; i++, sge++) {
|
|
+ if (unlikely(!sge->length))
|
|
continue;
|
|
|
|
- sge_info->total_len += wr->sg_list[i].length;
|
|
- sge_info->valid_num++;
|
|
+ len += sge->length;
|
|
+ cnt++;
|
|
|
|
- if (wr->send_flags & IBV_SEND_INLINE &&
|
|
- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
|
|
- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP)
|
|
+ if (flag)
|
|
continue;
|
|
|
|
- /* No inner sge in UD wqe */
|
|
- if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE &&
|
|
- qp->verbs_qp.qp.qp_type != IBV_QPT_UD) {
|
|
- set_data_seg_v2(dseg, wr->sg_list + i);
|
|
+ if (cnt <= HNS_ROCE_SGE_IN_WQE) {
|
|
+ set_data_seg_v2(dseg, sge);
|
|
dseg++;
|
|
} else {
|
|
- dseg = get_send_sge_ex(qp, sge_info->start_idx &
|
|
- (qp->ex_sge.sge_cnt - 1));
|
|
- set_data_seg_v2(dseg, wr->sg_list + i);
|
|
- sge_info->start_idx++;
|
|
+ dseg = get_send_sge_ex(qp, index & mask);
|
|
+ set_data_seg_v2(dseg, sge);
|
|
+ index++;
|
|
}
|
|
}
|
|
+
|
|
+ sge_info->start_idx = index;
|
|
+ sge_info->valid_num = cnt;
|
|
+ sge_info->total_len = len;
|
|
+}
|
|
+
|
|
+static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
|
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
|
+ struct hns_roce_sge_info *sge_info)
|
|
+{
|
|
+ int flag = wr->send_flags & IBV_SEND_INLINE;
|
|
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
|
|
+ uint32_t index = sge_info->start_idx;
|
|
+ struct ibv_sge *sge = wr->sg_list;
|
|
+ uint32_t len = 0;
|
|
+ uint32_t cnt = 0;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < wr->num_sge; i++, sge++) {
|
|
+ if (unlikely(!sge->length))
|
|
+ continue;
|
|
+
|
|
+ len += sge->length;
|
|
+ cnt++;
|
|
+
|
|
+ if (flag)
|
|
+ continue;
|
|
+
|
|
+ /* No inner sge in UD wqe */
|
|
+ dseg = get_send_sge_ex(qp, index & mask);
|
|
+ set_data_seg_v2(dseg, sge);
|
|
+ index++;
|
|
+ }
|
|
+
|
|
+ sge_info->start_idx = index;
|
|
+ sge_info->valid_num = cnt;
|
|
+ sge_info->total_len = len;
|
|
}
|
|
|
|
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
|
|
@@ -910,7 +949,7 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
|
|
UD_SQ_WQE_MSG_START_SGE_IDX_S,
|
|
sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
|
|
|
|
- set_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
|
|
+ set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
|
|
|
|
ud_sq_wqe->msg_len = htole32(sge_info->total_len);
|
|
|
|
@@ -1111,7 +1150,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
|
wqe += sizeof(struct hns_roce_rc_sq_wqe);
|
|
dseg = wqe;
|
|
|
|
- set_sge(dseg, qp, wr, sge_info);
|
|
+ set_rc_sge(dseg, qp, wr, sge_info);
|
|
|
|
rc_sq_wqe->msg_len = htole32(sge_info->total_len);
|
|
|
|
--
|
|
2.30.0
|
|
|