rdma-core/0016-libhns-Optimize-set_sge-process.patch

140 lines
4.1 KiB
Diff
Raw Normal View History

From 3507f87f776043acd238d7c0c41cc3511f186d08 Mon Sep 17 00:00:00 2001
From: Lang Cheng <chenglang@huawei.com>
Date: Fri, 14 May 2021 10:02:57 +0800
Subject: libhns: Optimize set_sge process
Use local variables to avoid frequent ldr/str operations. And because UD's
process of setting sge is more simple then RC, set_sge() can be splited
into two functions for compiler optimization.
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 83 +++++++++++++++++++++++---------
1 file changed, 61 insertions(+), 22 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4988943a..dc79a6f8 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -701,39 +701,78 @@ static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx)
return 0;
}
-static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg,
- struct hns_roce_qp *qp, struct ibv_send_wr *wr,
- struct hns_roce_sge_info *sge_info)
+static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ struct hns_roce_sge_info *sge_info)
{
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
+ uint32_t index = sge_info->start_idx;
+ struct ibv_sge *sge = wr->sg_list;
+ uint32_t len = 0;
+ uint32_t cnt = 0;
+ int flag;
int i;
- sge_info->valid_num = 0;
- sge_info->total_len = 0;
+ flag = (wr->send_flags & IBV_SEND_INLINE &&
+ wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
+ wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP);
- for (i = 0; i < wr->num_sge; i++) {
- if (unlikely(!wr->sg_list[i].length))
+ for (i = 0; i < wr->num_sge; i++, sge++) {
+ if (unlikely(!sge->length))
continue;
- sge_info->total_len += wr->sg_list[i].length;
- sge_info->valid_num++;
+ len += sge->length;
+ cnt++;
- if (wr->send_flags & IBV_SEND_INLINE &&
- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP)
+ if (flag)
continue;
- /* No inner sge in UD wqe */
- if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE &&
- qp->verbs_qp.qp.qp_type != IBV_QPT_UD) {
- set_data_seg_v2(dseg, wr->sg_list + i);
+ if (cnt <= HNS_ROCE_SGE_IN_WQE) {
+ set_data_seg_v2(dseg, sge);
dseg++;
} else {
- dseg = get_send_sge_ex(qp, sge_info->start_idx &
- (qp->ex_sge.sge_cnt - 1));
- set_data_seg_v2(dseg, wr->sg_list + i);
- sge_info->start_idx++;
+ dseg = get_send_sge_ex(qp, index & mask);
+ set_data_seg_v2(dseg, sge);
+ index++;
}
}
+
+ sge_info->start_idx = index;
+ sge_info->valid_num = cnt;
+ sge_info->total_len = len;
+}
+
+static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ struct hns_roce_sge_info *sge_info)
+{
+ int flag = wr->send_flags & IBV_SEND_INLINE;
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
+ uint32_t index = sge_info->start_idx;
+ struct ibv_sge *sge = wr->sg_list;
+ uint32_t len = 0;
+ uint32_t cnt = 0;
+ int i;
+
+ for (i = 0; i < wr->num_sge; i++, sge++) {
+ if (unlikely(!sge->length))
+ continue;
+
+ len += sge->length;
+ cnt++;
+
+ if (flag)
+ continue;
+
+ /* No inner sge in UD wqe */
+ dseg = get_send_sge_ex(qp, index & mask);
+ set_data_seg_v2(dseg, sge);
+ index++;
+ }
+
+ sge_info->start_idx = index;
+ sge_info->valid_num = cnt;
+ sge_info->total_len = len;
}
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
@@ -910,7 +949,7 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
UD_SQ_WQE_MSG_START_SGE_IDX_S,
sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
- set_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
+ set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
ud_sq_wqe->msg_len = htole32(sge_info->total_len);
@@ -1111,7 +1150,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
wqe += sizeof(struct hns_roce_rc_sq_wqe);
dseg = wqe;
- set_sge(dseg, qp, wr, sge_info);
+ set_rc_sge(dseg, qp, wr, sge_info);
rc_sq_wqe->msg_len = htole32(sge_info->total_len);
--
2.30.0